1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 #include <petsc/private/vecimpl.h> 10 11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 15 #include <cmumps_c.h> 16 #include <zmumps_c.h> 17 #include <smumps_c.h> 18 #include <dmumps_c.h> 19 #else 20 #if defined(PETSC_USE_COMPLEX) 21 #if defined(PETSC_USE_REAL_SINGLE) 22 #include <cmumps_c.h> 23 #define MUMPS_c cmumps_c 24 #define MumpsScalar CMUMPS_COMPLEX 25 #else 26 #include <zmumps_c.h> 27 #define MUMPS_c zmumps_c 28 #define MumpsScalar ZMUMPS_COMPLEX 29 #endif 30 #else 31 #if defined(PETSC_USE_REAL_SINGLE) 32 #include <smumps_c.h> 33 #define MUMPS_c smumps_c 34 #define MumpsScalar SMUMPS_REAL 35 #else 36 #include <dmumps_c.h> 37 #define MUMPS_c dmumps_c 38 #define MumpsScalar DMUMPS_REAL 39 #endif 40 #endif 41 #endif 42 #if defined(PETSC_USE_COMPLEX) 43 #if defined(PETSC_USE_REAL_SINGLE) 44 #define MUMPS_STRUC_C CMUMPS_STRUC_C 45 #else 46 #define MUMPS_STRUC_C ZMUMPS_STRUC_C 47 #endif 48 #else 49 #if defined(PETSC_USE_REAL_SINGLE) 50 #define MUMPS_STRUC_C SMUMPS_STRUC_C 51 #else 52 #define MUMPS_STRUC_C DMUMPS_STRUC_C 53 #endif 54 #endif 55 EXTERN_C_END 56 57 #define JOB_INIT -1 58 #define JOB_NULL 0 59 #define JOB_FACTSYMBOLIC 1 60 #define JOB_FACTNUMERIC 2 61 #define JOB_SOLVE 3 62 #define JOB_END -2 63 64 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 65 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 66 naming convention in PetscMPIInt, PetscBLASInt etc. 67 */ 68 typedef MUMPS_INT PetscMUMPSInt; 69 70 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 71 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 72 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 73 #endif 74 #else 75 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 76 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 77 #endif 78 #endif 79 80 #define MPIU_MUMPSINT MPI_INT 81 #define PETSC_MUMPS_INT_MAX 2147483647 82 #define PETSC_MUMPS_INT_MIN -2147483648 83 84 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 85 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b) 86 { 87 PetscFunctionBegin; 88 #if PetscDefined(USE_64BIT_INDICES) 89 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 90 #endif 91 *b = (PetscMUMPSInt)a; 92 PetscFunctionReturn(PETSC_SUCCESS); 93 } 94 95 /* Put these utility routines here since they are only used in this file */ 96 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 97 { 98 PetscInt myval; 99 PetscBool myset; 100 101 PetscFunctionBegin; 102 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 103 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 104 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 105 if (set) *set = myset; 106 PetscFunctionReturn(PETSC_SUCCESS); 107 } 108 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 109 110 // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C. 111 // 112 // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type. 113 // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size 114 // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall(). 115 // 116 // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface. 117 // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used. 118 // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers. 119 typedef struct { 120 PetscPrecision precision; // precision used by MUMPS 121 void *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex 122 123 // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions. 124 MUMPS_INT sym, par, job; 125 MUMPS_INT comm_fortran; /* Fortran communicator */ 126 MUMPS_INT *icntl; 127 void *cntl; // MumpsReal, fixed size array 128 MUMPS_INT n; 129 MUMPS_INT nblk; 130 131 /* Assembled entry */ 132 MUMPS_INT8 nnz; 133 MUMPS_INT *irn; 134 MUMPS_INT *jcn; 135 void *a; // MumpsScalar, centralized input 136 PetscCount a_len; 137 138 /* Distributed entry */ 139 MUMPS_INT8 nnz_loc; 140 MUMPS_INT *irn_loc; 141 MUMPS_INT *jcn_loc; 142 void *a_loc; // MumpsScalar, distributed input 143 PetscCount a_loc_len; 144 145 /* Matrix by blocks */ 146 MUMPS_INT *blkptr; 147 MUMPS_INT *blkvar; 148 149 /* Ordering, if given by user */ 150 MUMPS_INT *perm_in; 151 152 /* RHS, solution, ouptput data and statistics */ 153 void *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc; // MumpsScalar buffers 154 PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar 155 156 MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc; 157 MUMPS_INT nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc; 158 // MUMPS_INT nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0) 159 MUMPS_INT schur_lld; 160 MUMPS_INT *info, *infog; // fixed size array 161 void *rinfo, *rinfog; // MumpsReal, fixed size array 162 163 /* Null space */ 164 MUMPS_INT *pivnul_list; // allocated by MUMPS! 165 MUMPS_INT *mapping; // allocated by MUMPS! 166 167 /* Schur */ 168 MUMPS_INT size_schur; 169 MUMPS_INT *listvar_schur; 170 void *schur; // MumpsScalar 171 PetscCount schur_len; 172 173 /* For out-of-core */ 174 char *ooc_tmpdir; // fixed size array 175 char *ooc_prefix; // fixed size array 176 } XMUMPS_STRUC_C; 177 178 // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct 179 #define AllocatInternalID(MUMPS_STRUC_T, outer) \ 180 do { \ 181 MUMPS_STRUC_T *inner; \ 182 PetscCall(PetscNew(&inner)); \ 183 outer->icntl = inner->icntl; \ 184 outer->cntl = inner->cntl; \ 185 outer->info = inner->info; \ 186 outer->infog = inner->infog; \ 187 outer->rinfo = inner->rinfo; \ 188 outer->rinfog = inner->rinfog; \ 189 outer->ooc_tmpdir = inner->ooc_tmpdir; \ 190 outer->ooc_prefix = inner->ooc_prefix; \ 191 /* the three field should never change after init */ \ 192 inner->comm_fortran = outer->comm_fortran; \ 193 inner->par = outer->par; \ 194 inner->sym = outer->sym; \ 195 outer->internal_id = inner; \ 196 } while (0) 197 198 // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given <precision>, and link fields of the outer and the inner 199 static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision) 200 { 201 PetscFunctionBegin; 202 outer->precision = precision; 203 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 204 #if defined(PETSC_USE_COMPLEX) 205 if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(CMUMPS_STRUC_C, outer); 206 else AllocatInternalID(ZMUMPS_STRUC_C, outer); 207 #else 208 if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(SMUMPS_STRUC_C, outer); 209 else AllocatInternalID(DMUMPS_STRUC_C, outer); 210 #endif 211 #else 212 AllocatInternalID(MUMPS_STRUC_C, outer); 213 #endif 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \ 218 do { \ 219 MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \ 220 PetscCall(PetscFree(inner->a)); \ 221 PetscCall(PetscFree(inner->a_loc)); \ 222 PetscCall(PetscFree(inner->redrhs)); \ 223 PetscCall(PetscFree(inner->rhs)); \ 224 PetscCall(PetscFree(inner->rhs_sparse)); \ 225 PetscCall(PetscFree(inner->rhs_loc)); \ 226 PetscCall(PetscFree(inner->sol_loc)); \ 227 PetscCall(PetscFree(inner->schur)); \ 228 } while (0) 229 230 static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer) 231 { 232 PetscFunctionBegin; 233 if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer 234 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 235 const PetscPrecision mumps_precision = outer->precision; 236 if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision 237 #if defined(PETSC_USE_COMPLEX) 238 if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer); 239 else FreeInternalIDFields(ZMUMPS_STRUC_C, outer); 240 #else 241 if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer); 242 else FreeInternalIDFields(DMUMPS_STRUC_C, outer); 243 #endif 244 } 245 #endif 246 PetscCall(PetscFree(outer->internal_id)); 247 } 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least <n> MumpsScalars in the given <precision> and return the address at <ma>. 252 // <convert> indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array. 253 // (For bravity, we use <ma> for array address and <m> for its length in MumpsScalar, though in code they should be <*ma> and <*m>) 254 // If <ma> already points to a buffer/array, on input <m> should be its length. Note the buffer might be freed if it is not big enough for this request. 255 // 256 // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same. 257 // 1) If they are different, a separate array will be made and its length and address will be provided at <m> and <ma> on output. 258 // 2) Otherwise, <pa> will be returned in <ma>, and <m> will be zero on output. 259 // 260 // 261 // Input parameters: 262 // + convert - whether to do PetscScalar to MumpsScalar conversion 263 // . n - length of the PetscScalar array 264 // . pa - [n]], points to the PetscScalar array 265 // . precision - precision of MumpsScalar 266 // . m - on input, length of an existing MumpsScalar array <ma> if any, otherwise *m is just zero. 267 // - ma - on input, an existing MumpsScalar array if any. 268 // 269 // Output parameters: 270 // + m - length of the MumpsScalar buffer at <ma> if MumpsScalar is different from PetscScalar, otherwise 0 271 // . ma - the MumpsScalar array, which could be an alias of <pa> when the two types are the same. 272 // 273 // Note: 274 // New memory, if allocated, is done via PetscMalloc1(), and is owned by caller. 275 static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma) 276 { 277 PetscFunctionBegin; 278 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 279 const PetscPrecision mumps_precision = precision; 280 PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision); 281 #if defined(PETSC_USE_COMPLEX) 282 if (mumps_precision != PETSC_SCALAR_PRECISION) { 283 if (mumps_precision == PETSC_PRECISION_SINGLE) { 284 if (*m < n) { 285 PetscCall(PetscFree(*ma)); 286 PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma)); 287 *m = n; 288 } 289 if (convert) { 290 CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma; 291 for (PetscCount i = 0; i < n; i++) { 292 b[i].r = PetscRealPart(pa[i]); 293 b[i].i = PetscImaginaryPart(pa[i]); 294 } 295 } 296 } else { 297 if (*m < n) { 298 PetscCall(PetscFree(*ma)); 299 PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma)); 300 *m = n; 301 } 302 if (convert) { 303 ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma; 304 for (PetscCount i = 0; i < n; i++) { 305 b[i].r = PetscRealPart(pa[i]); 306 b[i].i = PetscImaginaryPart(pa[i]); 307 } 308 } 309 } 310 } 311 #else 312 if (mumps_precision != PETSC_SCALAR_PRECISION) { 313 if (mumps_precision == PETSC_PRECISION_SINGLE) { 314 if (*m < n) { 315 PetscCall(PetscFree(*ma)); 316 PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma)); 317 *m = n; 318 } 319 if (convert) { 320 SMUMPS_REAL *b = *(SMUMPS_REAL **)ma; 321 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 322 } 323 } else { 324 if (*m < n) { 325 PetscCall(PetscFree(*ma)); 326 PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma)); 327 *m = n; 328 } 329 if (convert) { 330 DMUMPS_REAL *b = *(DMUMPS_REAL **)ma; 331 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 332 } 333 } 334 } 335 #endif 336 else 337 #endif 338 { 339 if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any 340 *ma = (void *)pa; // same precision, make them alias 341 *m = 0; 342 } 343 PetscFunctionReturn(PETSC_SUCCESS); 344 } 345 346 // Cast a MumpsScalar array <ma[n]> in <mumps_precision> to a PetscScalar array at address <pa>. 347 // 348 // 1) If the two types are different, cast array elements. 349 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op. 350 static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa) 351 { 352 PetscFunctionBegin; 353 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 354 if (mumps_precision != PETSC_SCALAR_PRECISION) { 355 #if defined(PETSC_USE_COMPLEX) 356 if (mumps_precision == PETSC_PRECISION_SINGLE) { 357 PetscReal *a = (PetscReal *)pa; 358 const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma; 359 for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i]; 360 } else { 361 PetscReal *a = (PetscReal *)pa; 362 const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma; 363 for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i]; 364 } 365 #else 366 if (mumps_precision == PETSC_PRECISION_SINGLE) { 367 const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma; 368 for (PetscCount i = 0; i < n; i++) pa[i] = b[i]; 369 } else { 370 const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma; 371 for (PetscCount i = 0; i < n; i++) pa[i] = b[i]; 372 } 373 #endif 374 } else 375 #endif 376 PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n)); 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 // Cast a PetscScalar array <pa[n]> to a MumpsScalar array in the given <mumps_precision> at address <ma>. 381 // 382 // 1) If the two types are different, cast array elements. 383 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op. 384 static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma) 385 { 386 PetscFunctionBegin; 387 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 388 if (mumps_precision != PETSC_SCALAR_PRECISION) { 389 #if defined(PETSC_USE_COMPLEX) 390 if (mumps_precision == PETSC_PRECISION_SINGLE) { 391 CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma; 392 for (PetscCount i = 0; i < n; i++) { 393 b[i].r = PetscRealPart(pa[i]); 394 b[i].i = PetscImaginaryPart(pa[i]); 395 } 396 } else { 397 ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma; 398 for (PetscCount i = 0; i < n; i++) { 399 b[i].r = PetscRealPart(pa[i]); 400 b[i].i = PetscImaginaryPart(pa[i]); 401 } 402 } 403 #else 404 if (mumps_precision == PETSC_PRECISION_SINGLE) { 405 SMUMPS_REAL *b = (SMUMPS_REAL *)ma; 406 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 407 } else { 408 DMUMPS_REAL *b = (DMUMPS_REAL *)ma; 409 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 410 } 411 #endif 412 } else 413 #endif 414 PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n)); 415 PetscFunctionReturn(PETSC_SUCCESS); 416 } 417 418 static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id) 419 { 420 return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT; 421 } 422 423 #define PreMumpsCall(inner, outer, mumpsscalar) \ 424 do { \ 425 inner->job = outer->job; \ 426 inner->n = outer->n; \ 427 inner->nblk = outer->nblk; \ 428 inner->nnz = outer->nnz; \ 429 inner->irn = outer->irn; \ 430 inner->jcn = outer->jcn; \ 431 inner->a = (mumpsscalar *)outer->a; \ 432 inner->nnz_loc = outer->nnz_loc; \ 433 inner->irn_loc = outer->irn_loc; \ 434 inner->jcn_loc = outer->jcn_loc; \ 435 inner->a_loc = (mumpsscalar *)outer->a_loc; \ 436 inner->blkptr = outer->blkptr; \ 437 inner->blkvar = outer->blkvar; \ 438 inner->perm_in = outer->perm_in; \ 439 inner->rhs = (mumpsscalar *)outer->rhs; \ 440 inner->redrhs = (mumpsscalar *)outer->redrhs; \ 441 inner->rhs_sparse = (mumpsscalar *)outer->rhs_sparse; \ 442 inner->sol_loc = (mumpsscalar *)outer->sol_loc; \ 443 inner->rhs_loc = (mumpsscalar *)outer->rhs_loc; \ 444 inner->irhs_sparse = outer->irhs_sparse; \ 445 inner->irhs_ptr = outer->irhs_ptr; \ 446 inner->isol_loc = outer->isol_loc; \ 447 inner->irhs_loc = outer->irhs_loc; \ 448 inner->nrhs = outer->nrhs; \ 449 inner->lrhs = outer->lrhs; \ 450 inner->lredrhs = outer->lredrhs; \ 451 inner->nz_rhs = outer->nz_rhs; \ 452 inner->lsol_loc = outer->lsol_loc; \ 453 inner->nloc_rhs = outer->nloc_rhs; \ 454 inner->lrhs_loc = outer->lrhs_loc; \ 455 inner->schur_lld = outer->schur_lld; \ 456 inner->size_schur = outer->size_schur; \ 457 inner->listvar_schur = outer->listvar_schur; \ 458 inner->schur = (mumpsscalar *)outer->schur; \ 459 } while (0) 460 461 #define PostMumpsCall(inner, outer) \ 462 do { \ 463 outer->pivnul_list = inner->pivnul_list; \ 464 outer->mapping = inner->mapping; \ 465 } while (0) 466 467 // Entry for PETSc to call mumps 468 static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer) 469 { 470 PetscFunctionBegin; 471 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 472 #if defined(PETSC_USE_COMPLEX) 473 if (outer->precision == PETSC_PRECISION_SINGLE) { 474 CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id; 475 PreMumpsCall(inner, outer, CMUMPS_COMPLEX); 476 PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner)); 477 PostMumpsCall(inner, outer); 478 } else { 479 ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id; 480 PreMumpsCall(inner, outer, ZMUMPS_COMPLEX); 481 PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner)); 482 PostMumpsCall(inner, outer); 483 } 484 #else 485 if (outer->precision == PETSC_PRECISION_SINGLE) { 486 SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id; 487 PreMumpsCall(inner, outer, SMUMPS_REAL); 488 PetscStackCallExternalVoid("smumps_c", smumps_c(inner)); 489 PostMumpsCall(inner, outer); 490 } else { 491 DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id; 492 PreMumpsCall(inner, outer, DMUMPS_REAL); 493 PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner)); 494 PostMumpsCall(inner, outer); 495 } 496 #endif 497 #else 498 MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id; 499 PreMumpsCall(inner, outer, MumpsScalar); 500 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner)); 501 PostMumpsCall(inner, outer); 502 #endif 503 PetscFunctionReturn(PETSC_SUCCESS); 504 } 505 506 /* macros s.t. indices match MUMPS documentation */ 507 #define ICNTL(I) icntl[(I) - 1] 508 #define INFOG(I) infog[(I) - 1] 509 #define INFO(I) info[(I) - 1] 510 511 // Get a value from a MumpsScalar array, which is the <F> field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I! 512 #if defined(PETSC_USE_COMPLEX) 513 #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i) 514 #else 515 #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I]) 516 #endif 517 518 // Get a value from MumpsReal arrays. The value is convertible to PetscReal. 519 #define ID_CNTL_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1]) 520 #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1]) 521 #define ID_RINFO_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1]) 522 523 // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal <VAL> 524 #define ID_CNTL_SET(ID, I, VAL) \ 525 do { \ 526 if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \ 527 else ((double *)(ID).cntl)[(I) - 1] = (VAL); \ 528 } while (0) 529 530 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 531 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 532 #define PetscMUMPS_c(mumps) \ 533 do { \ 534 if (mumps->use_petsc_omp_support) { \ 535 if (mumps->is_omp_master) { \ 536 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 537 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 538 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 539 PetscCall(PetscFPTrapPop()); \ 540 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 541 } \ 542 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 543 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 544 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 545 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 546 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 547 */ \ 548 MUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \ 549 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 550 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 551 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \ 552 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \ 553 } else { \ 554 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 555 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 556 PetscCall(PetscFPTrapPop()); \ 557 } \ 558 } while (0) 559 #else 560 #define PetscMUMPS_c(mumps) \ 561 do { \ 562 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 563 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 564 PetscCall(PetscFPTrapPop()); \ 565 } while (0) 566 #endif 567 568 typedef struct Mat_MUMPS Mat_MUMPS; 569 struct Mat_MUMPS { 570 XMUMPS_STRUC_C id; 571 572 MatStructure matstruc; 573 PetscMPIInt myid, petsc_size; 574 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 575 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 576 PetscCount nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 577 PetscMUMPSInt sym; 578 MPI_Comm mumps_comm; 579 PetscMUMPSInt *ICNTL_pre; 580 PetscReal *CNTL_pre; 581 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 582 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 583 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 584 PetscMUMPSInt ICNTL26; 585 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 586 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 587 PetscInt *rhs_nrow, max_nrhs; 588 PetscMPIInt *rhs_recvcounts, *rhs_disps; 589 PetscScalar *rhs_loc, *rhs_recvbuf; 590 #endif 591 Vec b_seq, x_seq; 592 PetscInt ninfo, *info; /* which INFO to display */ 593 PetscInt sizeredrhs; 594 PetscScalar *schur_sol; 595 PetscInt schur_sizesol; 596 PetscScalar *redrhs; // buffer in PetscScalar in case MumpsScalar is in a different precision 597 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 598 PetscCount cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 599 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 600 601 /* Support for MATNEST */ 602 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 603 PetscCount *nest_vals_start; 604 PetscScalar *nest_vals; 605 606 /* stuff used by petsc/mumps OpenMP support*/ 607 PetscBool use_petsc_omp_support; 608 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 609 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */ 610 PetscCount *recvcount; /* a collection of nnz on omp_master */ 611 PetscMPIInt tag, omp_comm_size; 612 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 613 MPI_Request *reqs; 614 }; 615 616 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 617 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 618 */ 619 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 620 { 621 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */ 622 623 PetscFunctionBegin; 624 #if defined(PETSC_USE_64BIT_INDICES) 625 { 626 PetscInt i; 627 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 628 PetscCall(PetscFree(mumps->ia_alloc)); 629 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 630 mumps->cur_ilen = nrow + 1; 631 } 632 if (nnz > mumps->cur_jlen) { 633 PetscCall(PetscFree(mumps->ja_alloc)); 634 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 635 mumps->cur_jlen = nnz; 636 } 637 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i])); 638 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i])); 639 *ia_mumps = mumps->ia_alloc; 640 *ja_mumps = mumps->ja_alloc; 641 } 642 #else 643 *ia_mumps = ia; 644 *ja_mumps = ja; 645 #endif 646 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 647 PetscFunctionReturn(PETSC_SUCCESS); 648 } 649 650 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 651 { 652 PetscFunctionBegin; 653 PetscCall(PetscFree(mumps->id.listvar_schur)); 654 PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID() 655 PetscCall(PetscFree(mumps->schur_sol)); 656 mumps->id.size_schur = 0; 657 mumps->id.schur_lld = 0; 658 if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built 659 PetscFunctionReturn(PETSC_SUCCESS); 660 } 661 662 /* solve with rhs in mumps->id.redrhs and return in the same location */ 663 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 664 { 665 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 666 Mat S, B, X; // solve S*X = B; all three matrices are dense 667 MatFactorSchurStatus schurstatus; 668 PetscInt sizesol; 669 const PetscScalar *xarray; 670 671 PetscFunctionBegin; 672 PetscCall(MatFactorFactorizeSchurComplement(F)); 673 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 674 PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs)); 675 676 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B)); 677 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 678 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 679 PetscCall(MatBindToCPU(B, S->boundtocpu)); 680 #endif 681 switch (schurstatus) { 682 case MAT_FACTOR_SCHUR_FACTORED: 683 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X)); 684 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 685 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 686 PetscCall(MatBindToCPU(X, S->boundtocpu)); 687 #endif 688 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 689 PetscCall(MatMatSolveTranspose(S, B, X)); 690 } else { 691 PetscCall(MatMatSolve(S, B, X)); 692 } 693 break; 694 case MAT_FACTOR_SCHUR_INVERTED: 695 sizesol = mumps->id.nrhs * mumps->id.size_schur; 696 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 697 PetscCall(PetscFree(mumps->schur_sol)); 698 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 699 mumps->schur_sizesol = sizesol; 700 } 701 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 702 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 703 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 704 PetscCall(MatBindToCPU(X, S->boundtocpu)); 705 #endif 706 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 707 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 708 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 709 } else { 710 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 711 } 712 PetscCall(MatProductSetFromOptions(X)); 713 PetscCall(MatProductSymbolic(X)); 714 PetscCall(MatProductNumeric(X)); 715 716 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 717 break; 718 default: 719 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 720 } 721 // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device. 722 PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs. 723 PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs)); 724 PetscCall(MatDenseRestoreArrayRead(X, &xarray)); 725 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 726 PetscCall(MatDestroy(&B)); 727 PetscCall(MatDestroy(&X)); 728 PetscFunctionReturn(PETSC_SUCCESS); 729 } 730 731 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 732 { 733 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 734 735 PetscFunctionBegin; 736 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 if (!expansion) { /* prepare for the condensation step */ 740 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 741 /* allocate MUMPS internal array to store reduced right-hand sides */ 742 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 743 mumps->id.lredrhs = mumps->id.size_schur; 744 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 745 if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs)); 746 PetscCall(PetscFree(mumps->redrhs)); 747 PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs)); 748 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs)); 749 } 750 } else { /* prepare for the expansion step */ 751 PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */ 752 mumps->id.ICNTL(26) = 2; /* expansion phase */ 753 PetscMUMPS_c(mumps); 754 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 755 /* restore defaults */ 756 mumps->id.ICNTL(26) = -1; 757 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 758 if (mumps->id.nrhs > 1) { 759 if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs)); 760 PetscCall(PetscFree(mumps->redrhs)); 761 mumps->id.redrhs_len = 0; 762 mumps->id.lredrhs = 0; 763 mumps->sizeredrhs = 0; 764 } 765 } 766 PetscFunctionReturn(PETSC_SUCCESS); 767 } 768 769 /* 770 MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz] 771 772 input: 773 A - matrix in aij,baij or sbaij format 774 shift - 0: C style output triple; 1: Fortran style output triple. 775 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 776 MAT_REUSE_MATRIX: only the values in v array are updated 777 output: 778 nnz - dim of r, c, and v (number of local nonzero entries of A) 779 r, c, v - row and col index, matrix values (matrix triples) 780 781 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 782 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 783 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 784 785 */ 786 787 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 788 { 789 const PetscScalar *av; 790 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 791 PetscCount nz, rnz, k; 792 PetscMUMPSInt *row, *col; 793 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 794 795 PetscFunctionBegin; 796 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 797 if (reuse == MAT_INITIAL_MATRIX) { 798 nz = aa->nz; 799 ai = aa->i; 800 aj = aa->j; 801 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 802 for (PetscCount i = k = 0; i < M; i++) { 803 rnz = ai[i + 1] - ai[i]; 804 ajj = aj + ai[i]; 805 for (PetscCount j = 0; j < rnz; j++) { 806 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 807 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 808 k++; 809 } 810 } 811 mumps->val = (PetscScalar *)av; 812 mumps->irn = row; 813 mumps->jcn = col; 814 mumps->nnz = nz; 815 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 816 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 817 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 818 PetscFunctionReturn(PETSC_SUCCESS); 819 } 820 821 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 822 { 823 PetscCount nz, i, j, k, r; 824 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 825 PetscMUMPSInt *row, *col; 826 827 PetscFunctionBegin; 828 nz = a->sliidx[a->totalslices]; 829 if (reuse == MAT_INITIAL_MATRIX) { 830 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 831 for (i = k = 0; i < a->totalslices; i++) { 832 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 833 } 834 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 835 mumps->irn = row; 836 mumps->jcn = col; 837 mumps->nnz = nz; 838 mumps->val = a->val; 839 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 840 else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 845 { 846 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 847 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 848 PetscCount M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 849 PetscInt bs; 850 PetscMUMPSInt *row, *col; 851 852 PetscFunctionBegin; 853 if (reuse == MAT_INITIAL_MATRIX) { 854 PetscCall(MatGetBlockSize(A, &bs)); 855 M = A->rmap->N / bs; 856 ai = aa->i; 857 aj = aa->j; 858 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 859 for (i = 0; i < M; i++) { 860 ajj = aj + ai[i]; 861 rnz = ai[i + 1] - ai[i]; 862 for (k = 0; k < rnz; k++) { 863 for (j = 0; j < bs; j++) { 864 for (m = 0; m < bs; m++) { 865 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 866 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 867 idx++; 868 } 869 } 870 } 871 } 872 mumps->irn = row; 873 mumps->jcn = col; 874 mumps->nnz = nz; 875 mumps->val = aa->a; 876 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 877 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 878 PetscFunctionReturn(PETSC_SUCCESS); 879 } 880 881 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 882 { 883 const PetscInt *ai, *aj, *ajj; 884 PetscInt bs; 885 PetscCount nz, rnz, i, j, k, m; 886 PetscMUMPSInt *row, *col; 887 PetscScalar *val; 888 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 889 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 890 #if defined(PETSC_USE_COMPLEX) 891 PetscBool isset, hermitian; 892 #endif 893 894 PetscFunctionBegin; 895 #if defined(PETSC_USE_COMPLEX) 896 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 897 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 898 #endif 899 ai = aa->i; 900 aj = aa->j; 901 PetscCall(MatGetBlockSize(A, &bs)); 902 if (reuse == MAT_INITIAL_MATRIX) { 903 const PetscCount alloc_size = aa->nz * bs2; 904 905 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 906 if (bs > 1) { 907 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 908 mumps->val = mumps->val_alloc; 909 } else { 910 mumps->val = aa->a; 911 } 912 mumps->irn = row; 913 mumps->jcn = col; 914 } else { 915 row = mumps->irn; 916 col = mumps->jcn; 917 } 918 val = mumps->val; 919 920 nz = 0; 921 if (bs > 1) { 922 for (i = 0; i < mbs; i++) { 923 rnz = ai[i + 1] - ai[i]; 924 ajj = aj + ai[i]; 925 for (j = 0; j < rnz; j++) { 926 for (k = 0; k < bs; k++) { 927 for (m = 0; m < bs; m++) { 928 if (ajj[j] > i || k >= m) { 929 if (reuse == MAT_INITIAL_MATRIX) { 930 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 931 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 932 } 933 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 934 } 935 } 936 } 937 } 938 } 939 } else if (reuse == MAT_INITIAL_MATRIX) { 940 for (i = 0; i < mbs; i++) { 941 rnz = ai[i + 1] - ai[i]; 942 ajj = aj + ai[i]; 943 for (j = 0; j < rnz; j++) { 944 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 945 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 946 nz++; 947 } 948 } 949 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz); 950 } else if (mumps->nest_vals) 951 PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 952 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 953 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 954 PetscFunctionReturn(PETSC_SUCCESS); 955 } 956 957 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 958 { 959 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 960 PetscCount nz, rnz, i, j; 961 const PetscScalar *av, *v1; 962 PetscScalar *val; 963 PetscMUMPSInt *row, *col; 964 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 965 PetscBool diagDense; 966 #if defined(PETSC_USE_COMPLEX) 967 PetscBool hermitian, isset; 968 #endif 969 970 PetscFunctionBegin; 971 #if defined(PETSC_USE_COMPLEX) 972 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 973 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 974 #endif 975 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 976 ai = aa->i; 977 aj = aa->j; 978 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense)); 979 if (reuse == MAT_INITIAL_MATRIX) { 980 /* count nz in the upper triangular part of A */ 981 nz = 0; 982 if (!diagDense) { 983 for (i = 0; i < M; i++) { 984 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 985 for (j = ai[i]; j < ai[i + 1]; j++) { 986 if (aj[j] < i) continue; 987 nz++; 988 } 989 } else { 990 nz += ai[i + 1] - adiag[i]; 991 } 992 } 993 } else { 994 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 995 } 996 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 997 PetscCall(PetscMalloc1(nz, &val)); 998 mumps->nnz = nz; 999 mumps->irn = row; 1000 mumps->jcn = col; 1001 mumps->val = mumps->val_alloc = val; 1002 1003 nz = 0; 1004 if (!diagDense) { 1005 for (i = 0; i < M; i++) { 1006 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 1007 for (j = ai[i]; j < ai[i + 1]; j++) { 1008 if (aj[j] < i) continue; 1009 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1010 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 1011 val[nz] = av[j]; 1012 nz++; 1013 } 1014 } else { 1015 rnz = ai[i + 1] - adiag[i]; 1016 ajj = aj + adiag[i]; 1017 v1 = av + adiag[i]; 1018 for (j = 0; j < rnz; j++) { 1019 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1020 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 1021 val[nz++] = v1[j]; 1022 } 1023 } 1024 } 1025 } else { 1026 for (i = 0; i < M; i++) { 1027 rnz = ai[i + 1] - adiag[i]; 1028 ajj = aj + adiag[i]; 1029 v1 = av + adiag[i]; 1030 for (j = 0; j < rnz; j++) { 1031 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1032 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 1033 val[nz++] = v1[j]; 1034 } 1035 } 1036 } 1037 } else { 1038 nz = 0; 1039 val = mumps->val; 1040 if (!diagDense) { 1041 for (i = 0; i < M; i++) { 1042 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 1043 for (j = ai[i]; j < ai[i + 1]; j++) { 1044 if (aj[j] < i) continue; 1045 val[nz++] = av[j]; 1046 } 1047 } else { 1048 rnz = ai[i + 1] - adiag[i]; 1049 v1 = av + adiag[i]; 1050 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 1051 } 1052 } 1053 } else { 1054 for (i = 0; i < M; i++) { 1055 rnz = ai[i + 1] - adiag[i]; 1056 v1 = av + adiag[i]; 1057 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 1058 } 1059 } 1060 } 1061 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 1062 PetscFunctionReturn(PETSC_SUCCESS); 1063 } 1064 1065 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1066 { 1067 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 1068 PetscInt bs; 1069 PetscCount rstart, nz, i, j, k, m, jj, irow, countA, countB; 1070 PetscMUMPSInt *row, *col; 1071 const PetscScalar *av, *bv, *v1, *v2; 1072 PetscScalar *val; 1073 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 1074 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)mat->A->data; 1075 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 1076 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 1077 #if defined(PETSC_USE_COMPLEX) 1078 PetscBool hermitian, isset; 1079 #endif 1080 1081 PetscFunctionBegin; 1082 #if defined(PETSC_USE_COMPLEX) 1083 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 1084 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 1085 #endif 1086 PetscCall(MatGetBlockSize(A, &bs)); 1087 rstart = A->rmap->rstart; 1088 ai = aa->i; 1089 aj = aa->j; 1090 bi = bb->i; 1091 bj = bb->j; 1092 av = aa->a; 1093 bv = bb->a; 1094 1095 garray = mat->garray; 1096 1097 if (reuse == MAT_INITIAL_MATRIX) { 1098 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 1099 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1100 PetscCall(PetscMalloc1(nz, &val)); 1101 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 1102 mumps->irn = row; 1103 mumps->jcn = col; 1104 mumps->val = mumps->val_alloc = val; 1105 } else { 1106 val = mumps->val; 1107 } 1108 1109 jj = 0; 1110 irow = rstart; 1111 for (i = 0; i < mbs; i++) { 1112 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 1113 countA = ai[i + 1] - ai[i]; 1114 countB = bi[i + 1] - bi[i]; 1115 bjj = bj + bi[i]; 1116 v1 = av + ai[i] * bs2; 1117 v2 = bv + bi[i] * bs2; 1118 1119 if (bs > 1) { 1120 /* A-part */ 1121 for (j = 0; j < countA; j++) { 1122 for (k = 0; k < bs; k++) { 1123 for (m = 0; m < bs; m++) { 1124 if (rstart + ajj[j] * bs > irow || k >= m) { 1125 if (reuse == MAT_INITIAL_MATRIX) { 1126 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 1127 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 1128 } 1129 val[jj++] = v1[j * bs2 + m + k * bs]; 1130 } 1131 } 1132 } 1133 } 1134 1135 /* B-part */ 1136 for (j = 0; j < countB; j++) { 1137 for (k = 0; k < bs; k++) { 1138 for (m = 0; m < bs; m++) { 1139 if (reuse == MAT_INITIAL_MATRIX) { 1140 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 1141 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 1142 } 1143 val[jj++] = v2[j * bs2 + m + k * bs]; 1144 } 1145 } 1146 } 1147 } else { 1148 /* A-part */ 1149 for (j = 0; j < countA; j++) { 1150 if (reuse == MAT_INITIAL_MATRIX) { 1151 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1152 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 1153 } 1154 val[jj++] = v1[j]; 1155 } 1156 1157 /* B-part */ 1158 for (j = 0; j < countB; j++) { 1159 if (reuse == MAT_INITIAL_MATRIX) { 1160 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1161 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1162 } 1163 val[jj++] = v2[j]; 1164 } 1165 } 1166 irow += bs; 1167 } 1168 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1173 { 1174 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 1175 PetscCount rstart, cstart, nz, i, j, jj, irow, countA, countB; 1176 PetscMUMPSInt *row, *col; 1177 const PetscScalar *av, *bv, *v1, *v2; 1178 PetscScalar *val; 1179 Mat Ad, Ao; 1180 Mat_SeqAIJ *aa; 1181 Mat_SeqAIJ *bb; 1182 1183 PetscFunctionBegin; 1184 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 1185 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 1186 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 1187 1188 aa = (Mat_SeqAIJ *)Ad->data; 1189 bb = (Mat_SeqAIJ *)Ao->data; 1190 ai = aa->i; 1191 aj = aa->j; 1192 bi = bb->i; 1193 bj = bb->j; 1194 1195 rstart = A->rmap->rstart; 1196 cstart = A->cmap->rstart; 1197 1198 if (reuse == MAT_INITIAL_MATRIX) { 1199 nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 1200 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1201 PetscCall(PetscMalloc1(nz, &val)); 1202 mumps->nnz = nz; 1203 mumps->irn = row; 1204 mumps->jcn = col; 1205 mumps->val = mumps->val_alloc = val; 1206 } else { 1207 val = mumps->val; 1208 } 1209 1210 jj = 0; 1211 irow = rstart; 1212 for (i = 0; i < m; i++) { 1213 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 1214 countA = ai[i + 1] - ai[i]; 1215 countB = bi[i + 1] - bi[i]; 1216 bjj = bj + bi[i]; 1217 v1 = av + ai[i]; 1218 v2 = bv + bi[i]; 1219 1220 /* A-part */ 1221 for (j = 0; j < countA; j++) { 1222 if (reuse == MAT_INITIAL_MATRIX) { 1223 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1224 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 1225 } 1226 val[jj++] = v1[j]; 1227 } 1228 1229 /* B-part */ 1230 for (j = 0; j < countB; j++) { 1231 if (reuse == MAT_INITIAL_MATRIX) { 1232 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1233 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1234 } 1235 val[jj++] = v2[j]; 1236 } 1237 irow++; 1238 } 1239 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 1240 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1245 { 1246 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 1247 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)mat->A->data; 1248 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 1249 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 1250 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 1251 const PetscInt bs2 = mat->bs2; 1252 PetscInt bs; 1253 PetscCount nz, i, j, k, n, jj, irow, countA, countB, idx; 1254 PetscMUMPSInt *row, *col; 1255 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 1256 PetscScalar *val; 1257 1258 PetscFunctionBegin; 1259 PetscCall(MatGetBlockSize(A, &bs)); 1260 if (reuse == MAT_INITIAL_MATRIX) { 1261 nz = bs2 * (aa->nz + bb->nz); 1262 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1263 PetscCall(PetscMalloc1(nz, &val)); 1264 mumps->nnz = nz; 1265 mumps->irn = row; 1266 mumps->jcn = col; 1267 mumps->val = mumps->val_alloc = val; 1268 } else { 1269 val = mumps->val; 1270 } 1271 1272 jj = 0; 1273 irow = rstart; 1274 for (i = 0; i < mbs; i++) { 1275 countA = ai[i + 1] - ai[i]; 1276 countB = bi[i + 1] - bi[i]; 1277 ajj = aj + ai[i]; 1278 bjj = bj + bi[i]; 1279 v1 = av + bs2 * ai[i]; 1280 v2 = bv + bs2 * bi[i]; 1281 1282 idx = 0; 1283 /* A-part */ 1284 for (k = 0; k < countA; k++) { 1285 for (j = 0; j < bs; j++) { 1286 for (n = 0; n < bs; n++) { 1287 if (reuse == MAT_INITIAL_MATRIX) { 1288 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 1289 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 1290 } 1291 val[jj++] = v1[idx++]; 1292 } 1293 } 1294 } 1295 1296 idx = 0; 1297 /* B-part */ 1298 for (k = 0; k < countB; k++) { 1299 for (j = 0; j < bs; j++) { 1300 for (n = 0; n < bs; n++) { 1301 if (reuse == MAT_INITIAL_MATRIX) { 1302 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 1303 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 1304 } 1305 val[jj++] = v2[idx++]; 1306 } 1307 } 1308 } 1309 irow += bs; 1310 } 1311 PetscFunctionReturn(PETSC_SUCCESS); 1312 } 1313 1314 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1315 { 1316 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 1317 PetscCount rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 1318 PetscMUMPSInt *row, *col; 1319 const PetscScalar *av, *bv, *v1, *v2; 1320 PetscScalar *val; 1321 Mat Ad, Ao; 1322 Mat_SeqAIJ *aa; 1323 Mat_SeqAIJ *bb; 1324 #if defined(PETSC_USE_COMPLEX) 1325 PetscBool hermitian, isset; 1326 #endif 1327 1328 PetscFunctionBegin; 1329 #if defined(PETSC_USE_COMPLEX) 1330 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 1331 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 1332 #endif 1333 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 1334 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 1335 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 1336 1337 aa = (Mat_SeqAIJ *)Ad->data; 1338 bb = (Mat_SeqAIJ *)Ao->data; 1339 ai = aa->i; 1340 aj = aa->j; 1341 bi = bb->i; 1342 bj = bb->j; 1343 PetscCall(MatGetDiagonalMarkers_SeqAIJ(Ad, &adiag, NULL)); 1344 rstart = A->rmap->rstart; 1345 1346 if (reuse == MAT_INITIAL_MATRIX) { 1347 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 1348 nzb = 0; /* num of upper triangular entries in mat->B */ 1349 for (i = 0; i < m; i++) { 1350 nza += (ai[i + 1] - adiag[i]); 1351 countB = bi[i + 1] - bi[i]; 1352 bjj = bj + bi[i]; 1353 for (j = 0; j < countB; j++) { 1354 if (garray[bjj[j]] > rstart) nzb++; 1355 } 1356 } 1357 1358 nz = nza + nzb; /* total nz of upper triangular part of mat */ 1359 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1360 PetscCall(PetscMalloc1(nz, &val)); 1361 mumps->nnz = nz; 1362 mumps->irn = row; 1363 mumps->jcn = col; 1364 mumps->val = mumps->val_alloc = val; 1365 } else { 1366 val = mumps->val; 1367 } 1368 1369 jj = 0; 1370 irow = rstart; 1371 for (i = 0; i < m; i++) { 1372 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 1373 v1 = av + adiag[i]; 1374 countA = ai[i + 1] - adiag[i]; 1375 countB = bi[i + 1] - bi[i]; 1376 bjj = bj + bi[i]; 1377 v2 = bv + bi[i]; 1378 1379 /* A-part */ 1380 for (j = 0; j < countA; j++) { 1381 if (reuse == MAT_INITIAL_MATRIX) { 1382 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1383 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 1384 } 1385 val[jj++] = v1[j]; 1386 } 1387 1388 /* B-part */ 1389 for (j = 0; j < countB; j++) { 1390 if (garray[bjj[j]] > rstart) { 1391 if (reuse == MAT_INITIAL_MATRIX) { 1392 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1393 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1394 } 1395 val[jj++] = v2[j]; 1396 } 1397 } 1398 irow++; 1399 } 1400 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 1401 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 1402 PetscFunctionReturn(PETSC_SUCCESS); 1403 } 1404 1405 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1406 { 1407 const PetscScalar *av; 1408 const PetscInt M = A->rmap->n; 1409 PetscCount i; 1410 PetscMUMPSInt *row, *col; 1411 Vec v; 1412 1413 PetscFunctionBegin; 1414 PetscCall(MatDiagonalGetDiagonal(A, &v)); 1415 PetscCall(VecGetArrayRead(v, &av)); 1416 if (reuse == MAT_INITIAL_MATRIX) { 1417 PetscCall(PetscMalloc2(M, &row, M, &col)); 1418 for (i = 0; i < M; i++) { 1419 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1420 col[i] = row[i]; 1421 } 1422 mumps->val = (PetscScalar *)av; 1423 mumps->irn = row; 1424 mumps->jcn = col; 1425 mumps->nnz = M; 1426 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1427 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1428 PetscCall(VecRestoreArrayRead(v, &av)); 1429 PetscFunctionReturn(PETSC_SUCCESS); 1430 } 1431 1432 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1433 { 1434 PetscScalar *v; 1435 const PetscInt m = A->rmap->n, N = A->cmap->N; 1436 PetscInt lda; 1437 PetscCount i, j; 1438 PetscMUMPSInt *row, *col; 1439 1440 PetscFunctionBegin; 1441 PetscCall(MatDenseGetArray(A, &v)); 1442 PetscCall(MatDenseGetLDA(A, &lda)); 1443 if (reuse == MAT_INITIAL_MATRIX) { 1444 PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 1445 for (i = 0; i < m; i++) { 1446 col[i] = 0; 1447 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1448 } 1449 for (j = 1; j < N; j++) { 1450 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 1451 PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 1452 } 1453 if (lda == m) mumps->val = v; 1454 else { 1455 PetscCall(PetscMalloc1(m * N, &mumps->val)); 1456 mumps->val_alloc = mumps->val; 1457 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1458 } 1459 mumps->irn = row; 1460 mumps->jcn = col; 1461 mumps->nnz = m * N; 1462 } else { 1463 if (lda == m && !mumps->nest_vals) mumps->val = v; 1464 else { 1465 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1466 } 1467 } 1468 PetscCall(MatDenseRestoreArray(A, &v)); 1469 PetscFunctionReturn(PETSC_SUCCESS); 1470 } 1471 1472 // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a 1473 // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated 1474 // and its rows and columns permuted 1475 // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest() 1476 static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap) 1477 { 1478 Mat A; 1479 PetscScalar s[2]; 1480 PetscBool isTrans, isHTrans, compare; 1481 1482 PetscFunctionBegin; 1483 do { 1484 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1485 if (isTrans) { 1486 PetscCall(MatTransposeGetMat(*sub, &A)); 1487 isHTrans = PETSC_FALSE; 1488 } else { 1489 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1490 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A)); 1491 } 1492 compare = (PetscBool)(isTrans || isHTrans); 1493 if (compare) { 1494 if (vshift && vscale) { 1495 PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 1496 if (!*conjugate) { 1497 *vshift += s[0] * *vscale; 1498 *vscale *= s[1]; 1499 } else { 1500 *vshift += PetscConj(s[0]) * *vscale; 1501 *vscale *= PetscConj(s[1]); 1502 } 1503 } 1504 if (swap) *swap = (PetscBool)!*swap; 1505 if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate; 1506 *sub = A; 1507 } 1508 } while (compare); 1509 PetscFunctionReturn(PETSC_SUCCESS); 1510 } 1511 1512 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1513 { 1514 Mat **mats; 1515 PetscInt nr, nc; 1516 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1517 1518 PetscFunctionBegin; 1519 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1520 if (reuse == MAT_INITIAL_MATRIX) { 1521 PetscMUMPSInt *irns, *jcns; 1522 PetscScalar *vals; 1523 PetscCount totnnz, cumnnz, maxnnz; 1524 PetscInt *pjcns_w, Mbs = 0; 1525 IS *rows, *cols; 1526 PetscInt **rows_idx, **cols_idx; 1527 1528 cumnnz = 0; 1529 maxnnz = 0; 1530 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1531 for (PetscInt r = 0; r < nr; r++) { 1532 for (PetscInt c = 0; c < nc; c++) { 1533 Mat sub = mats[r][c]; 1534 1535 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1536 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1537 if (sub) { 1538 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1539 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 1540 MatInfo info; 1541 1542 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 1543 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1544 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1545 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1546 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1547 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1548 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1549 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1550 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 1551 1552 if (chol) { 1553 if (r == c) { 1554 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1555 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1556 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1557 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1558 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1559 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1560 } else { 1561 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1562 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1563 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1564 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1565 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1566 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1567 } 1568 } else { 1569 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1570 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1571 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1572 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1573 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1574 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1575 } 1576 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1577 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1578 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1579 cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */ 1580 maxnnz = PetscMax(maxnnz, info.nz_used); 1581 } 1582 } 1583 } 1584 1585 /* Allocate total COO */ 1586 totnnz = cumnnz; 1587 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1588 PetscCall(PetscMalloc1(totnnz, &vals)); 1589 1590 /* Handle rows and column maps 1591 We directly map rows and use an SF for the columns */ 1592 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1593 PetscCall(MatNestGetISs(A, rows, cols)); 1594 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1595 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1596 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1597 else (void)maxnnz; 1598 1599 cumnnz = 0; 1600 for (PetscInt r = 0; r < nr; r++) { 1601 for (PetscInt c = 0; c < nc; c++) { 1602 Mat sub = mats[r][c]; 1603 const PetscInt *ridx = rows_idx[r]; 1604 const PetscInt *cidx = cols_idx[c]; 1605 PetscScalar vscale = 1.0, vshift = 0.0; 1606 PetscInt rst, size, bs; 1607 PetscSF csf; 1608 PetscBool conjugate = PETSC_FALSE, swap = PETSC_FALSE; 1609 PetscLayout cmap; 1610 PetscInt innz; 1611 1612 mumps->nest_vals_start[r * nc + c] = cumnnz; 1613 if (c == r) { 1614 PetscCall(ISGetSize(rows[r], &size)); 1615 if (!mumps->nest_convert_to_triples[r * nc + c]) { 1616 for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 1617 } 1618 PetscCall(MatGetBlockSize(sub, &bs)); 1619 Mbs += size / bs; 1620 } 1621 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1622 1623 /* Extract inner blocks if needed */ 1624 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap)); 1625 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1626 1627 /* Get column layout to map off-process columns */ 1628 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1629 1630 /* Get row start to map on-process rows */ 1631 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1632 1633 /* Directly use the mumps datastructure and use C ordering for now */ 1634 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1635 1636 /* Swap the role of rows and columns indices for transposed blocks 1637 since we need values with global final ordering */ 1638 if (swap) { 1639 cidx = rows_idx[r]; 1640 ridx = cols_idx[c]; 1641 } 1642 1643 /* Communicate column indices 1644 This could have been done with a single SF but it would have complicated the code a lot. 1645 But since we do it only once, we pay the price of setting up an SF for each block */ 1646 if (PetscDefined(USE_64BIT_INDICES)) { 1647 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1648 } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */ 1649 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1650 PetscCall(PetscIntCast(mumps->nnz, &innz)); 1651 PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1652 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1653 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1654 PetscCall(PetscSFDestroy(&csf)); 1655 1656 /* Import indices: use direct map for rows and mapped indices for columns */ 1657 if (swap) { 1658 for (PetscInt k = 0; k < mumps->nnz; k++) { 1659 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1660 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1661 } 1662 } else { 1663 for (PetscInt k = 0; k < mumps->nnz; k++) { 1664 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1665 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1666 } 1667 } 1668 1669 /* Import values to full COO */ 1670 if (conjugate) { /* conjugate the entries */ 1671 PetscScalar *v = vals + cumnnz; 1672 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]); 1673 } else if (vscale != 1.0) { 1674 PetscScalar *v = vals + cumnnz; 1675 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k]; 1676 } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1677 1678 /* Shift new starting point and sanity check */ 1679 cumnnz += mumps->nnz; 1680 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1681 1682 /* Free scratch memory */ 1683 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1684 PetscCall(PetscFree(mumps->val_alloc)); 1685 mumps->val = NULL; 1686 mumps->nnz = 0; 1687 } 1688 } 1689 if (mumps->id.ICNTL(15) == 1) { 1690 if (Mbs != A->rmap->N) { 1691 PetscMPIInt rank, size; 1692 1693 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 1694 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1695 if (rank == 0) { 1696 PetscInt shift = 0; 1697 1698 PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk)); 1699 PetscCall(PetscFree(mumps->id.blkptr)); 1700 PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr)); 1701 mumps->id.blkptr[0] = 1; 1702 for (PetscInt i = 0; i < size; ++i) { 1703 for (PetscInt r = 0; r < nr; r++) { 1704 Mat sub = mats[r][r]; 1705 const PetscInt *ranges; 1706 PetscInt bs; 1707 1708 for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 1709 PetscCall(MatGetOwnershipRanges(sub, &ranges)); 1710 PetscCall(MatGetBlockSize(sub, &bs)); 1711 for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1)); 1712 shift += (ranges[i + 1] - ranges[i]) / bs; 1713 } 1714 } 1715 } 1716 } else mumps->id.ICNTL(15) = 0; 1717 } 1718 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1719 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1720 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1721 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1722 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1723 mumps->nest_vals_start[nr * nc] = cumnnz; 1724 1725 /* Set pointers for final MUMPS data structure */ 1726 mumps->nest_vals = vals; 1727 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1728 mumps->val = vals; 1729 mumps->irn = irns; 1730 mumps->jcn = jcns; 1731 mumps->nnz = cumnnz; 1732 } else { 1733 PetscScalar *oval = mumps->nest_vals; 1734 for (PetscInt r = 0; r < nr; r++) { 1735 for (PetscInt c = 0; c < nc; c++) { 1736 PetscBool conjugate = PETSC_FALSE; 1737 Mat sub = mats[r][c]; 1738 PetscScalar vscale = 1.0, vshift = 0.0; 1739 PetscInt midx = r * nc + c; 1740 1741 if (!mumps->nest_convert_to_triples[midx]) continue; 1742 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL)); 1743 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1744 mumps->val = oval + mumps->nest_vals_start[midx]; 1745 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1746 if (conjugate) { 1747 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1748 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]); 1749 } else if (vscale != 1.0) { 1750 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1751 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale; 1752 } 1753 } 1754 } 1755 mumps->val = oval; 1756 } 1757 PetscFunctionReturn(PETSC_SUCCESS); 1758 } 1759 1760 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1761 { 1762 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1763 1764 PetscFunctionBegin; 1765 PetscCall(PetscFree(mumps->id.isol_loc)); 1766 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1767 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1768 PetscCall(VecDestroy(&mumps->b_seq)); 1769 PetscCall(VecDestroy(&mumps->x_seq)); 1770 PetscCall(PetscFree(mumps->id.perm_in)); 1771 PetscCall(PetscFree(mumps->id.blkvar)); 1772 PetscCall(PetscFree(mumps->id.blkptr)); 1773 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1774 PetscCall(PetscFree(mumps->val_alloc)); 1775 PetscCall(PetscFree(mumps->info)); 1776 PetscCall(PetscFree(mumps->ICNTL_pre)); 1777 PetscCall(PetscFree(mumps->CNTL_pre)); 1778 PetscCall(MatMumpsResetSchur_Private(mumps)); 1779 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1780 mumps->id.job = JOB_END; 1781 PetscMUMPS_c(mumps); 1782 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1783 if (mumps->mumps_comm != MPI_COMM_NULL) { 1784 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1785 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1786 } 1787 } 1788 PetscCall(MatMumpsFreeInternalID(&mumps->id)); 1789 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1790 if (mumps->use_petsc_omp_support) { 1791 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1792 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1793 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1794 } 1795 #endif 1796 PetscCall(PetscFree(mumps->ia_alloc)); 1797 PetscCall(PetscFree(mumps->ja_alloc)); 1798 PetscCall(PetscFree(mumps->recvcount)); 1799 PetscCall(PetscFree(mumps->reqs)); 1800 PetscCall(PetscFree(mumps->irhs_loc)); 1801 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1802 PetscCall(PetscFree(mumps->nest_vals)); 1803 PetscCall(PetscFree(A->data)); 1804 1805 /* clear composed functions */ 1806 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1807 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1808 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1809 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1810 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1811 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1812 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1813 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1814 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1815 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1816 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1817 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1818 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1819 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1820 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL)); 1821 PetscFunctionReturn(PETSC_SUCCESS); 1822 } 1823 1824 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1825 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1826 { 1827 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1828 const PetscMPIInt ompsize = mumps->omp_comm_size; 1829 PetscInt i, m, M, rstart; 1830 1831 PetscFunctionBegin; 1832 PetscCall(MatGetSize(A, &M, NULL)); 1833 PetscCall(MatGetLocalSize(A, &m, NULL)); 1834 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1835 if (ompsize == 1) { 1836 if (!mumps->irhs_loc) { 1837 mumps->nloc_rhs = (PetscMUMPSInt)m; 1838 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1839 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1840 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */ 1841 } 1842 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc)); 1843 } else { 1844 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1845 const PetscInt *ranges; 1846 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1847 MPI_Group petsc_group, omp_group; 1848 PetscScalar *recvbuf = NULL; 1849 1850 if (mumps->is_omp_master) { 1851 /* Lazily initialize the omp stuff for distributed rhs */ 1852 if (!mumps->irhs_loc) { 1853 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1854 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1855 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1856 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1857 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1858 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1859 1860 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1861 mumps->nloc_rhs = 0; 1862 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1863 for (j = 0; j < ompsize; j++) { 1864 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1865 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1866 } 1867 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1868 for (j = k = 0; j < ompsize; j++) { 1869 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */ 1870 } 1871 1872 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1873 PetscCallMPI(MPI_Group_free(&petsc_group)); 1874 PetscCallMPI(MPI_Group_free(&omp_group)); 1875 } 1876 1877 /* Realloc buffers when current nrhs is bigger than what we have met */ 1878 if (nrhs > mumps->max_nrhs) { 1879 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1880 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1881 mumps->max_nrhs = nrhs; 1882 } 1883 1884 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1885 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1886 mumps->rhs_disps[0] = 0; 1887 for (j = 1; j < ompsize; j++) { 1888 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1889 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1890 } 1891 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1892 } 1893 1894 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1895 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1896 1897 if (mumps->is_omp_master) { 1898 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1899 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1900 for (j = 0; j < ompsize; j++) { 1901 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1902 dst = dstbase; 1903 for (i = 0; i < nrhs; i++) { 1904 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1905 src += mumps->rhs_nrow[j]; 1906 dst += mumps->nloc_rhs; 1907 } 1908 dstbase += mumps->rhs_nrow[j]; 1909 } 1910 } 1911 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc)); 1912 } 1913 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1914 } 1915 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 1916 mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs; 1917 mumps->id.lrhs_loc = mumps->nloc_rhs; 1918 mumps->id.irhs_loc = mumps->irhs_loc; 1919 PetscFunctionReturn(PETSC_SUCCESS); 1920 } 1921 1922 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1923 { 1924 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1925 const PetscScalar *barray = NULL; 1926 PetscScalar *array; 1927 IS is_iden, is_petsc; 1928 PetscInt i; 1929 PetscBool second_solve = PETSC_FALSE; 1930 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1931 1932 PetscFunctionBegin; 1933 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1934 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1935 &cite1)); 1936 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1937 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1938 &cite2)); 1939 1940 PetscCall(VecFlag(x, A->factorerrortype)); 1941 if (A->factorerrortype) { 1942 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1943 PetscFunctionReturn(PETSC_SUCCESS); 1944 } 1945 1946 mumps->id.nrhs = 1; 1947 if (mumps->petsc_size > 1) { 1948 if (mumps->ICNTL20 == 10) { 1949 mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */ 1950 PetscCall(VecGetArrayRead(b, &barray)); 1951 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray)); 1952 } else { 1953 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/ 1954 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1955 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1956 if (!mumps->myid) { 1957 PetscCall(VecGetArray(mumps->b_seq, &array)); 1958 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 1959 } 1960 } 1961 } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */ 1962 mumps->id.ICNTL(20) = 0; 1963 PetscCall(VecCopy(b, x)); 1964 PetscCall(VecGetArray(x, &array)); 1965 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 1966 } 1967 1968 /* 1969 handle condensation step of Schur complement (if any) 1970 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1971 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1972 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1973 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1974 */ 1975 if (mumps->id.size_schur > 0) { 1976 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1977 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1978 second_solve = PETSC_TRUE; 1979 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs 1980 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1981 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1982 } 1983 1984 mumps->id.job = JOB_SOLVE; 1985 PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs 1986 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 1987 1988 /* handle expansion step of Schur complement (if any) */ 1989 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1990 else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side 1991 PetscCall(MatMumpsSolveSchur_Private(A)); 1992 for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i); 1993 } 1994 1995 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */ 1996 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1997 /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1998 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1999 } 2000 if (!mumps->scat_sol) { /* create scatter scat_sol */ 2001 PetscInt *isol2_loc = NULL; 2002 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 2003 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 2004 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 2005 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 2006 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 2007 PetscCall(ISDestroy(&is_iden)); 2008 PetscCall(ISDestroy(&is_petsc)); 2009 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 2010 } 2011 2012 PetscScalar *xarray; 2013 PetscCall(VecGetArray(mumps->x_seq, &xarray)); 2014 PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray)); 2015 PetscCall(VecRestoreArray(mumps->x_seq, &xarray)); 2016 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 2017 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 2018 2019 if (mumps->ICNTL20 == 10) { // distributed RHS 2020 PetscCall(VecRestoreArrayRead(b, &barray)); 2021 } else if (!mumps->myid) { // centralized RHS 2022 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 2023 } 2024 } else { 2025 // id.rhs has the solution in mumps precision 2026 PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array)); 2027 PetscCall(VecRestoreArray(x, &array)); 2028 } 2029 2030 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 2035 { 2036 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2037 const PetscMUMPSInt value = mumps->id.ICNTL(9); 2038 2039 PetscFunctionBegin; 2040 mumps->id.ICNTL(9) = 0; 2041 PetscCall(MatSolve_MUMPS(A, b, x)); 2042 mumps->id.ICNTL(9) = value; 2043 PetscFunctionReturn(PETSC_SUCCESS); 2044 } 2045 2046 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 2047 { 2048 Mat Bt = NULL; 2049 PetscBool denseX, denseB, flg, flgT; 2050 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2051 PetscInt i, nrhs, M, nrhsM; 2052 PetscScalar *array; 2053 const PetscScalar *barray; 2054 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 2055 PetscMUMPSInt *isol_loc, *isol_loc_save; 2056 PetscScalar *sol_loc; 2057 void *sol_loc_save; 2058 PetscCount sol_loc_len_save; 2059 IS is_to, is_from; 2060 PetscInt k, proc, j, m, myrstart; 2061 const PetscInt *rstart; 2062 Vec v_mpi, msol_loc; 2063 VecScatter scat_sol; 2064 Vec b_seq; 2065 VecScatter scat_rhs; 2066 PetscScalar *aa; 2067 PetscInt spnr, *ia, *ja; 2068 Mat_MPIAIJ *b = NULL; 2069 2070 PetscFunctionBegin; 2071 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 2072 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 2073 2074 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 2075 2076 if (denseB) { 2077 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 2078 mumps->id.ICNTL(20) = 0; /* dense RHS */ 2079 } else { /* sparse B */ 2080 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 2081 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 2082 PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 2083 PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 2084 /* input B is transpose of actual RHS matrix, 2085 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 2086 PetscCall(MatTransposeGetMat(B, &Bt)); 2087 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 2088 } 2089 2090 PetscCall(MatGetSize(B, &M, &nrhs)); 2091 PetscCall(PetscIntMultError(nrhs, M, &nrhsM)); 2092 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 2093 mumps->id.lrhs = (PetscMUMPSInt)M; 2094 2095 if (mumps->petsc_size == 1) { // handle this easy case specially and return early 2096 PetscScalar *aa; 2097 PetscInt spnr, *ia, *ja; 2098 PetscBool second_solve = PETSC_FALSE; 2099 2100 PetscCall(MatDenseGetArray(X, &array)); 2101 if (denseB) { 2102 /* copy B to X */ 2103 PetscCall(MatDenseGetArrayRead(B, &barray)); 2104 PetscCall(PetscArraycpy(array, barray, nrhsM)); 2105 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2106 } else { /* sparse B */ 2107 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 2108 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2109 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2110 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2111 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 2112 } 2113 PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 2114 2115 /* handle condensation step of Schur complement (if any) */ 2116 if (mumps->id.size_schur > 0) { 2117 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 2118 second_solve = PETSC_TRUE; 2119 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs 2120 mumps->id.ICNTL(26) = 1; /* condensation phase, i.e, to solve id.redrhs */ 2121 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 2122 } 2123 2124 mumps->id.job = JOB_SOLVE; 2125 PetscMUMPS_c(mumps); 2126 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2127 2128 /* handle expansion step of Schur complement (if any) */ 2129 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 2130 else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side 2131 PetscCall(MatMumpsSolveSchur_Private(A)); 2132 for (j = 0; j < nrhs; ++j) 2133 for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs); 2134 } 2135 2136 if (!denseB) { /* sparse B, restore ia, ja */ 2137 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 2138 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2139 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 2140 } 2141 2142 // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X. 2143 PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array)); 2144 PetscCall(MatDenseRestoreArray(X, &array)); 2145 PetscFunctionReturn(PETSC_SUCCESS); 2146 } 2147 2148 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 2149 PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 2150 2151 /* create msol_loc to hold mumps local solution */ 2152 isol_loc_save = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */ 2153 sol_loc_save = mumps->id.sol_loc; 2154 sol_loc_len_save = mumps->id.sol_loc_len; 2155 mumps->id.isol_loc = NULL; // an init state 2156 mumps->id.sol_loc = NULL; 2157 mumps->id.sol_loc_len = 0; 2158 2159 lsol_loc = mumps->id.lsol_loc; 2160 PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */ 2161 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 2162 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc)); 2163 mumps->id.isol_loc = isol_loc; 2164 2165 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 2166 2167 if (denseB) { 2168 if (mumps->ICNTL20 == 10) { 2169 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 2170 PetscCall(MatDenseGetArrayRead(B, &barray)); 2171 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc 2172 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2173 PetscCall(MatGetLocalSize(B, &m, NULL)); 2174 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X 2175 } else { 2176 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 2177 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 2178 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 2179 0, re-arrange B into desired order, which is a local operation. 2180 */ 2181 2182 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 2183 /* wrap dense rhs matrix B into a vector v_mpi */ 2184 PetscCall(MatGetLocalSize(B, &m, NULL)); 2185 PetscCall(MatDenseGetArrayRead(B, &barray)); 2186 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi)); 2187 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2188 2189 /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */ 2190 if (!mumps->myid) { 2191 PetscInt *idx; 2192 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 2193 PetscCall(PetscMalloc1(nrhsM, &idx)); 2194 PetscCall(MatGetOwnershipRanges(B, &rstart)); 2195 for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) { 2196 for (j = 0; j < nrhs; j++) { 2197 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 2198 } 2199 } 2200 2201 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq)); 2202 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to)); 2203 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from)); 2204 } else { 2205 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 2206 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 2207 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 2208 } 2209 2210 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 2211 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 2212 PetscCall(ISDestroy(&is_to)); 2213 PetscCall(ISDestroy(&is_from)); 2214 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 2215 2216 if (!mumps->myid) { /* define rhs on the host */ 2217 PetscCall(VecGetArrayRead(b_seq, &barray)); 2218 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 2219 PetscCall(VecRestoreArrayRead(b_seq, &barray)); 2220 } 2221 } 2222 } else { /* sparse B */ 2223 b = (Mat_MPIAIJ *)Bt->data; 2224 2225 /* wrap dense X into a vector v_mpi */ 2226 PetscCall(MatGetLocalSize(X, &m, NULL)); 2227 PetscCall(MatDenseGetArrayRead(X, &barray)); 2228 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi)); 2229 PetscCall(MatDenseRestoreArrayRead(X, &barray)); 2230 2231 if (!mumps->myid) { 2232 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 2233 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2234 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2235 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2236 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 2237 } else { 2238 mumps->id.irhs_ptr = NULL; 2239 mumps->id.irhs_sparse = NULL; 2240 mumps->id.nz_rhs = 0; 2241 if (mumps->id.rhs_sparse_len) { 2242 PetscCall(PetscFree(mumps->id.rhs_sparse)); 2243 mumps->id.rhs_sparse_len = 0; 2244 } 2245 } 2246 } 2247 2248 /* solve phase */ 2249 mumps->id.job = JOB_SOLVE; 2250 PetscMUMPS_c(mumps); 2251 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2252 2253 /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */ 2254 PetscCall(MatDenseGetArray(X, &array)); 2255 PetscCall(VecPlaceArray(v_mpi, array)); 2256 2257 /* create scatter scat_sol */ 2258 PetscCall(MatGetOwnershipRanges(X, &rstart)); 2259 /* iidx: index for scatter mumps solution to PETSc X */ 2260 2261 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 2262 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 2263 for (i = 0; i < lsol_loc; i++) { 2264 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 2265 2266 for (proc = 0; proc < mumps->petsc_size; proc++) { 2267 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 2268 myrstart = rstart[proc]; 2269 k = isol_loc[i] - myrstart; /* local index on 1st column of PETSc vector X */ 2270 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to PETSc index in X */ 2271 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 2272 break; 2273 } 2274 } 2275 2276 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 2277 } 2278 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 2279 PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[] 2280 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 2281 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 2282 PetscCall(ISDestroy(&is_from)); 2283 PetscCall(ISDestroy(&is_to)); 2284 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 2285 PetscCall(MatDenseRestoreArray(X, &array)); 2286 2287 if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers 2288 mumps->id.sol_loc_len = 0; 2289 PetscCall(PetscFree(mumps->id.sol_loc)); 2290 } 2291 2292 // restore old values 2293 mumps->id.sol_loc = sol_loc_save; 2294 mumps->id.sol_loc_len = sol_loc_len_save; 2295 mumps->id.isol_loc = isol_loc_save; 2296 2297 PetscCall(PetscFree2(sol_loc, isol_loc)); 2298 PetscCall(PetscFree(idxx)); 2299 PetscCall(VecDestroy(&msol_loc)); 2300 PetscCall(VecDestroy(&v_mpi)); 2301 if (!denseB) { 2302 if (!mumps->myid) { 2303 b = (Mat_MPIAIJ *)Bt->data; 2304 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 2305 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2306 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 2307 } 2308 } else { 2309 if (mumps->ICNTL20 == 0) { 2310 PetscCall(VecDestroy(&b_seq)); 2311 PetscCall(VecScatterDestroy(&scat_rhs)); 2312 } 2313 } 2314 PetscCall(VecScatterDestroy(&scat_sol)); 2315 PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 2316 PetscFunctionReturn(PETSC_SUCCESS); 2317 } 2318 2319 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 2320 { 2321 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2322 const PetscMUMPSInt value = mumps->id.ICNTL(9); 2323 2324 PetscFunctionBegin; 2325 mumps->id.ICNTL(9) = 0; 2326 PetscCall(MatMatSolve_MUMPS(A, B, X)); 2327 mumps->id.ICNTL(9) = value; 2328 PetscFunctionReturn(PETSC_SUCCESS); 2329 } 2330 2331 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 2332 { 2333 PetscBool flg; 2334 Mat B; 2335 2336 PetscFunctionBegin; 2337 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 2338 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 2339 2340 /* Create B=Bt^T that uses Bt's data structure */ 2341 PetscCall(MatCreateTranspose(Bt, &B)); 2342 2343 PetscCall(MatMatSolve_MUMPS(A, B, X)); 2344 PetscCall(MatDestroy(&B)); 2345 PetscFunctionReturn(PETSC_SUCCESS); 2346 } 2347 2348 #if !defined(PETSC_USE_COMPLEX) 2349 /* 2350 input: 2351 F: numeric factor 2352 output: 2353 nneg: total number of negative pivots 2354 nzero: total number of zero pivots 2355 npos: (global dimension of F) - nneg - nzero 2356 */ 2357 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 2358 { 2359 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2360 PetscMPIInt size; 2361 2362 PetscFunctionBegin; 2363 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 2364 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 2365 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 2366 2367 if (nneg) *nneg = mumps->id.INFOG(12); 2368 if (nzero || npos) { 2369 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2370 if (nzero) *nzero = mumps->id.INFOG(28); 2371 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 2372 } 2373 PetscFunctionReturn(PETSC_SUCCESS); 2374 } 2375 #endif 2376 2377 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 2378 { 2379 PetscMPIInt nreqs; 2380 PetscMUMPSInt *irn, *jcn; 2381 PetscMPIInt count; 2382 PetscCount totnnz, remain; 2383 const PetscInt osize = mumps->omp_comm_size; 2384 PetscScalar *val; 2385 2386 PetscFunctionBegin; 2387 if (osize > 1) { 2388 if (reuse == MAT_INITIAL_MATRIX) { 2389 /* master first gathers counts of nonzeros to receive */ 2390 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 2391 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 2392 2393 /* Then each computes number of send/recvs */ 2394 if (mumps->is_omp_master) { 2395 /* Start from 1 since self communication is not done in MPI */ 2396 nreqs = 0; 2397 for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 2398 } else { 2399 nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX)); 2400 } 2401 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 2402 2403 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 2404 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 2405 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 2406 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 2407 */ 2408 nreqs = 0; /* counter for actual send/recvs */ 2409 if (mumps->is_omp_master) { 2410 totnnz = 0; 2411 2412 for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 2413 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 2414 PetscCall(PetscMalloc1(totnnz, &val)); 2415 2416 /* Self communication */ 2417 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 2418 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 2419 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 2420 2421 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 2422 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 2423 PetscCall(PetscFree(mumps->val_alloc)); 2424 mumps->nnz = totnnz; 2425 mumps->irn = irn; 2426 mumps->jcn = jcn; 2427 mumps->val = mumps->val_alloc = val; 2428 2429 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 2430 jcn += mumps->recvcount[0]; 2431 val += mumps->recvcount[0]; 2432 2433 /* Remote communication */ 2434 for (PetscMPIInt i = 1; i < osize; i++) { 2435 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2436 remain = mumps->recvcount[i] - count; 2437 while (count > 0) { 2438 PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2439 PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2440 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2441 irn += count; 2442 jcn += count; 2443 val += count; 2444 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2445 remain -= count; 2446 } 2447 } 2448 } else { 2449 irn = mumps->irn; 2450 jcn = mumps->jcn; 2451 val = mumps->val; 2452 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2453 remain = mumps->nnz - count; 2454 while (count > 0) { 2455 PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2456 PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2457 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2458 irn += count; 2459 jcn += count; 2460 val += count; 2461 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2462 remain -= count; 2463 } 2464 } 2465 } else { 2466 nreqs = 0; 2467 if (mumps->is_omp_master) { 2468 val = mumps->val + mumps->recvcount[0]; 2469 for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 2470 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2471 remain = mumps->recvcount[i] - count; 2472 while (count > 0) { 2473 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2474 val += count; 2475 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2476 remain -= count; 2477 } 2478 } 2479 } else { 2480 val = mumps->val; 2481 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2482 remain = mumps->nnz - count; 2483 while (count > 0) { 2484 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2485 val += count; 2486 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2487 remain -= count; 2488 } 2489 } 2490 } 2491 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 2492 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 2493 } 2494 PetscFunctionReturn(PETSC_SUCCESS); 2495 } 2496 2497 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info) 2498 { 2499 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2500 2501 PetscFunctionBegin; 2502 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 2503 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2504 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2505 PetscFunctionReturn(PETSC_SUCCESS); 2506 } 2507 2508 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 2509 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 2510 2511 /* numerical factorization phase */ 2512 mumps->id.job = JOB_FACTNUMERIC; 2513 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2514 if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2515 } else { 2516 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2517 } 2518 2519 if (F->schur) { 2520 const PetscScalar *array; 2521 MUMPS_INT size = mumps->id.size_schur; 2522 PetscCall(MatDenseGetArrayRead(F->schur, &array)); 2523 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur)); 2524 PetscCall(MatDenseRestoreArrayRead(F->schur, &array)); 2525 } 2526 2527 PetscMUMPS_c(mumps); 2528 if (mumps->id.INFOG(1) < 0) { 2529 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2530 if (mumps->id.INFOG(1) == -10) { 2531 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2532 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2533 } else if (mumps->id.INFOG(1) == -13) { 2534 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2535 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2536 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2537 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2538 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2539 } else { 2540 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2541 F->factorerrortype = MAT_FACTOR_OTHER; 2542 } 2543 } 2544 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2545 2546 F->assembled = PETSC_TRUE; 2547 2548 if (F->schur) { /* reset Schur status to unfactored */ 2549 #if defined(PETSC_HAVE_CUDA) 2550 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 2551 #endif 2552 PetscScalar *array; 2553 PetscCall(MatDenseGetArray(F->schur, &array)); 2554 PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array)); 2555 PetscCall(MatDenseRestoreArray(F->schur, &array)); 2556 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2557 mumps->id.ICNTL(19) = 2; 2558 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2559 } 2560 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2561 } 2562 2563 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2564 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2565 2566 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2567 // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the 2568 // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for 2569 // which the solution (in SOL_loc) is available on the local processor. 2570 // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will 2571 // normally have the same contents for each of these calls. The only exception is the case of 2572 // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non 2573 // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase. 2574 if (mumps->petsc_size > 1) { 2575 PetscInt lsol_loc; 2576 PetscScalar *array; 2577 2578 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2579 if (mumps->x_seq) { 2580 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2581 PetscCall(PetscFree(mumps->id.isol_loc)); 2582 PetscCall(VecDestroy(&mumps->x_seq)); 2583 } 2584 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2585 PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc)); 2586 PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq)); 2587 PetscCall(VecGetArray(mumps->x_seq, &array)); 2588 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc)); 2589 PetscCall(VecRestoreArray(mumps->x_seq, &array)); 2590 mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc; 2591 } 2592 PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2))); 2593 PetscFunctionReturn(PETSC_SUCCESS); 2594 } 2595 2596 /* Sets MUMPS options from the options database */ 2597 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2598 { 2599 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2600 PetscReal cntl; 2601 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2602 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2603 PetscBool flg = PETSC_FALSE; 2604 PetscBool schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1); 2605 void *arr; 2606 2607 PetscFunctionBegin; 2608 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2609 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2610 PetscPrecision precision = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE; 2611 PetscInt nthreads = 0; 2612 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2613 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2614 PetscMUMPSInt nblk, *blkvar, *blkptr; 2615 2616 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2617 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2618 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2619 2620 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2621 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2622 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2623 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2624 if (mumps->use_petsc_omp_support) { 2625 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2626 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2627 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2628 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2629 #else 2630 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2631 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2632 #endif 2633 } else { 2634 mumps->omp_comm = PETSC_COMM_SELF; 2635 mumps->mumps_comm = mumps->petsc_comm; 2636 mumps->is_omp_master = PETSC_TRUE; 2637 } 2638 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2639 mumps->reqs = NULL; 2640 mumps->tag = 0; 2641 2642 if (mumps->mumps_comm != MPI_COMM_NULL) { 2643 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2644 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2645 MPI_Comm comm; 2646 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2647 mumps->mumps_comm = comm; 2648 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2649 } 2650 2651 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2652 mumps->id.job = JOB_INIT; 2653 mumps->id.par = 1; /* host participates factorizaton and solve */ 2654 mumps->id.sym = mumps->sym; 2655 2656 size = mumps->id.size_schur; 2657 arr = mumps->id.schur; 2658 listvar_schur = mumps->id.listvar_schur; 2659 nblk = mumps->id.nblk; 2660 blkvar = mumps->id.blkvar; 2661 blkptr = mumps->id.blkptr; 2662 if (PetscDefined(USE_DEBUG)) { 2663 for (PetscInt i = 0; i < size; i++) 2664 PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1, 2665 A->rmap->N); 2666 } 2667 2668 PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL)); 2669 PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]); 2670 PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar"); 2671 PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision)); 2672 2673 PetscMUMPS_c(mumps); 2674 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2675 2676 /* set PETSc-MUMPS default options - override MUMPS default */ 2677 mumps->id.ICNTL(3) = 0; 2678 mumps->id.ICNTL(4) = 0; 2679 if (mumps->petsc_size == 1) { 2680 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2681 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2682 } else { 2683 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2684 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2685 } 2686 if (nblk && blkptr) { 2687 mumps->id.ICNTL(15) = 1; 2688 mumps->id.nblk = nblk; 2689 mumps->id.blkvar = blkvar; 2690 mumps->id.blkptr = blkptr; 2691 } else mumps->id.ICNTL(15) = 0; 2692 2693 /* restore cached ICNTL and CNTL values */ 2694 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2695 for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]); 2696 2697 PetscCall(PetscFree(mumps->ICNTL_pre)); 2698 PetscCall(PetscFree(mumps->CNTL_pre)); 2699 2700 if (schur) { 2701 mumps->id.size_schur = size; 2702 mumps->id.schur_lld = size; 2703 mumps->id.schur = arr; 2704 mumps->id.listvar_schur = listvar_schur; 2705 if (mumps->petsc_size > 1) { 2706 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2707 2708 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2709 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2710 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm)); 2711 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2712 } else { 2713 if (F->factortype == MAT_FACTOR_LU) { 2714 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2715 } else { 2716 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2717 } 2718 } 2719 mumps->id.ICNTL(26) = -1; 2720 } 2721 2722 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2723 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2724 */ 2725 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2726 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); 2727 2728 mumps->scat_rhs = NULL; 2729 mumps->scat_sol = NULL; 2730 } 2731 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2732 if (flg) mumps->id.ICNTL(1) = icntl; 2733 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2734 if (flg) mumps->id.ICNTL(2) = icntl; 2735 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2736 if (flg) mumps->id.ICNTL(3) = icntl; 2737 2738 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2739 if (flg) mumps->id.ICNTL(4) = icntl; 2740 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2741 2742 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2743 if (flg) mumps->id.ICNTL(6) = icntl; 2744 2745 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2746 if (flg) { 2747 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2748 mumps->id.ICNTL(7) = icntl; 2749 } 2750 2751 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2752 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2753 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2754 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2755 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2756 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2757 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2758 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2759 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs; 2760 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2761 if (flg) { 2762 if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2763 else if (mumps->id.ICNTL(15) > 0) { 2764 const PetscInt *bsizes; 2765 PetscInt nblocks, p, *blkptr = NULL; 2766 PetscMPIInt *recvcounts, *displs, n; 2767 PetscMPIInt rank, size = 0; 2768 2769 PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes)); 2770 flg = PETSC_TRUE; 2771 for (p = 0; p < nblocks; ++p) { 2772 if (bsizes[p] > 1) break; 2773 } 2774 if (p == nblocks) flg = PETSC_FALSE; 2775 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 2776 if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1 2777 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2778 if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2779 PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs)); 2780 PetscCall(PetscMPIIntCast(nblocks, &n)); 2781 PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A))); 2782 for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p]; 2783 PetscCall(PetscMalloc1(displs[size] + 1, &blkptr)); 2784 PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 2785 PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 2786 if (rank == 0) { 2787 blkptr[0] = 1; 2788 for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p]; 2789 PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr)); 2790 } 2791 PetscCall(PetscFree2(recvcounts, displs)); 2792 PetscCall(PetscFree(blkptr)); 2793 } 2794 } 2795 } 2796 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2797 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2798 PetscCall(MatDestroy(&F->schur)); 2799 PetscCall(MatMumpsResetSchur_Private(mumps)); 2800 } 2801 2802 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2803 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2804 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2805 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2806 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2807 In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 2808 */ 2809 mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */ 2810 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI) 2811 mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */ 2812 #endif 2813 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2814 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2815 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2816 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2817 #endif 2818 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2819 2820 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2821 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2822 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2823 if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 2824 2825 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2826 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2827 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2828 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2829 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2830 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2831 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2832 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2833 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2834 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2835 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2836 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL)); 2837 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2838 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL)); 2839 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL)); 2840 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2841 2842 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg)); 2843 if (flg) ID_CNTL_SET(mumps->id, 1, cntl); 2844 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg)); 2845 if (flg) ID_CNTL_SET(mumps->id, 2, cntl); 2846 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg)); 2847 if (flg) ID_CNTL_SET(mumps->id, 3, cntl); 2848 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg)); 2849 if (flg) ID_CNTL_SET(mumps->id, 4, cntl); 2850 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg)); 2851 if (flg) ID_CNTL_SET(mumps->id, 5, cntl); 2852 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg)); 2853 if (flg) ID_CNTL_SET(mumps->id, 7, cntl); 2854 2855 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2856 2857 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2858 if (ninfo) { 2859 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2860 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2861 mumps->ninfo = ninfo; 2862 for (i = 0; i < ninfo; i++) { 2863 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2864 mumps->info[i] = info[i]; 2865 } 2866 } 2867 PetscOptionsEnd(); 2868 PetscFunctionReturn(PETSC_SUCCESS); 2869 } 2870 2871 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps) 2872 { 2873 PetscFunctionBegin; 2874 if (mumps->id.INFOG(1) < 0) { 2875 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2876 if (mumps->id.INFOG(1) == -6) { 2877 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2878 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2879 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2880 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2881 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2882 } else { 2883 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2884 F->factorerrortype = MAT_FACTOR_OTHER; 2885 } 2886 } 2887 if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR; 2888 PetscFunctionReturn(PETSC_SUCCESS); 2889 } 2890 2891 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2892 { 2893 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2894 Vec b; 2895 const PetscInt M = A->rmap->N; 2896 2897 PetscFunctionBegin; 2898 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2899 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2900 PetscFunctionReturn(PETSC_SUCCESS); 2901 } 2902 2903 /* Set MUMPS options from the options database */ 2904 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2905 2906 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2907 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2908 2909 /* analysis phase */ 2910 mumps->id.job = JOB_FACTSYMBOLIC; 2911 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2912 switch (mumps->id.ICNTL(18)) { 2913 case 0: /* centralized assembled matrix input */ 2914 if (!mumps->myid) { 2915 mumps->id.nnz = mumps->nnz; 2916 mumps->id.irn = mumps->irn; 2917 mumps->id.jcn = mumps->jcn; 2918 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2919 if (r && mumps->id.ICNTL(7) == 7) { 2920 mumps->id.ICNTL(7) = 1; 2921 if (!mumps->myid) { 2922 const PetscInt *idx; 2923 PetscInt i; 2924 2925 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2926 PetscCall(ISGetIndices(r, &idx)); 2927 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */ 2928 PetscCall(ISRestoreIndices(r, &idx)); 2929 } 2930 } 2931 } 2932 break; 2933 case 3: /* distributed assembled matrix input (size>1) */ 2934 mumps->id.nnz_loc = mumps->nnz; 2935 mumps->id.irn_loc = mumps->irn; 2936 mumps->id.jcn_loc = mumps->jcn; 2937 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2938 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2939 PetscCall(MatCreateVecs(A, NULL, &b)); 2940 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2941 PetscCall(VecDestroy(&b)); 2942 } 2943 break; 2944 } 2945 PetscMUMPS_c(mumps); 2946 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2947 2948 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2949 F->ops->solve = MatSolve_MUMPS; 2950 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2951 F->ops->matsolve = MatMatSolve_MUMPS; 2952 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2953 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2954 2955 mumps->matstruc = SAME_NONZERO_PATTERN; 2956 PetscFunctionReturn(PETSC_SUCCESS); 2957 } 2958 2959 /* Note the PETSc r and c permutations are ignored */ 2960 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2961 { 2962 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2963 Vec b; 2964 const PetscInt M = A->rmap->N; 2965 2966 PetscFunctionBegin; 2967 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2968 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2969 PetscFunctionReturn(PETSC_SUCCESS); 2970 } 2971 2972 /* Set MUMPS options from the options database */ 2973 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2974 2975 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2976 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2977 2978 /* analysis phase */ 2979 mumps->id.job = JOB_FACTSYMBOLIC; 2980 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2981 switch (mumps->id.ICNTL(18)) { 2982 case 0: /* centralized assembled matrix input */ 2983 if (!mumps->myid) { 2984 mumps->id.nnz = mumps->nnz; 2985 mumps->id.irn = mumps->irn; 2986 mumps->id.jcn = mumps->jcn; 2987 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2988 } 2989 break; 2990 case 3: /* distributed assembled matrix input (size>1) */ 2991 mumps->id.nnz_loc = mumps->nnz; 2992 mumps->id.irn_loc = mumps->irn; 2993 mumps->id.jcn_loc = mumps->jcn; 2994 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2995 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2996 PetscCall(MatCreateVecs(A, NULL, &b)); 2997 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2998 PetscCall(VecDestroy(&b)); 2999 } 3000 break; 3001 } 3002 PetscMUMPS_c(mumps); 3003 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 3004 3005 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 3006 F->ops->solve = MatSolve_MUMPS; 3007 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 3008 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 3009 3010 mumps->matstruc = SAME_NONZERO_PATTERN; 3011 PetscFunctionReturn(PETSC_SUCCESS); 3012 } 3013 3014 /* Note the PETSc r permutation and factor info are ignored */ 3015 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info) 3016 { 3017 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3018 Vec b; 3019 const PetscInt M = A->rmap->N; 3020 3021 PetscFunctionBegin; 3022 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 3023 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 3024 PetscFunctionReturn(PETSC_SUCCESS); 3025 } 3026 3027 /* Set MUMPS options from the options database */ 3028 PetscCall(MatSetFromOptions_MUMPS(F, A)); 3029 3030 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 3031 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 3032 3033 /* analysis phase */ 3034 mumps->id.job = JOB_FACTSYMBOLIC; 3035 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 3036 switch (mumps->id.ICNTL(18)) { 3037 case 0: /* centralized assembled matrix input */ 3038 if (!mumps->myid) { 3039 mumps->id.nnz = mumps->nnz; 3040 mumps->id.irn = mumps->irn; 3041 mumps->id.jcn = mumps->jcn; 3042 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 3043 } 3044 break; 3045 case 3: /* distributed assembled matrix input (size>1) */ 3046 mumps->id.nnz_loc = mumps->nnz; 3047 mumps->id.irn_loc = mumps->irn; 3048 mumps->id.jcn_loc = mumps->jcn; 3049 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 3050 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 3051 PetscCall(MatCreateVecs(A, NULL, &b)); 3052 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 3053 PetscCall(VecDestroy(&b)); 3054 } 3055 break; 3056 } 3057 PetscMUMPS_c(mumps); 3058 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 3059 3060 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 3061 F->ops->solve = MatSolve_MUMPS; 3062 F->ops->solvetranspose = MatSolve_MUMPS; 3063 F->ops->matsolve = MatMatSolve_MUMPS; 3064 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 3065 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 3066 #if defined(PETSC_USE_COMPLEX) 3067 F->ops->getinertia = NULL; 3068 #else 3069 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 3070 #endif 3071 3072 mumps->matstruc = SAME_NONZERO_PATTERN; 3073 PetscFunctionReturn(PETSC_SUCCESS); 3074 } 3075 3076 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 3077 { 3078 PetscBool isascii; 3079 PetscViewerFormat format; 3080 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 3081 3082 PetscFunctionBegin; 3083 /* check if matrix is mumps type */ 3084 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 3085 3086 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 3087 if (isascii) { 3088 PetscCall(PetscViewerGetFormat(viewer, &format)); 3089 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 3090 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 3091 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 3092 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 3093 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 3094 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 3095 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 3096 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 3097 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 3098 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 3099 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 3100 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 3101 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 3102 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 3103 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 3104 if (mumps->id.ICNTL(11) > 0) { 3105 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)ID_RINFOG_GET(mumps->id, 4))); 3106 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)ID_RINFOG_GET(mumps->id, 5))); 3107 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)ID_RINFOG_GET(mumps->id, 6))); 3108 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8))); 3109 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)ID_RINFOG_GET(mumps->id, 9))); 3110 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11))); 3111 } 3112 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 3113 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 3114 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 3115 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 3116 /* ICNTL(15-17) not used */ 3117 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 3118 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 3119 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 3120 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 3121 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 3122 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 3123 3124 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 3125 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 3126 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 3127 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 3128 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 3129 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 3130 3131 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 3132 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 3133 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 3134 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 3135 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 3136 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(37) (compression of the contribution blocks): %d\n", mumps->id.ICNTL(37))); 3137 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 3138 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(48) (multithreading with tree parallelism): %d\n", mumps->id.ICNTL(48))); 3139 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56))); 3140 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 3141 3142 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 1))); 3143 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2))); 3144 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 3))); 3145 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)ID_CNTL_GET(mumps->id, 4))); 3146 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)ID_CNTL_GET(mumps->id, 5))); 3147 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)ID_CNTL_GET(mumps->id, 7))); 3148 3149 /* information local to each processor */ 3150 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 3151 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 3152 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1))); 3153 PetscCall(PetscViewerFlush(viewer)); 3154 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 3155 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2))); 3156 PetscCall(PetscViewerFlush(viewer)); 3157 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 3158 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3))); 3159 PetscCall(PetscViewerFlush(viewer)); 3160 3161 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 3162 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 3163 PetscCall(PetscViewerFlush(viewer)); 3164 3165 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 3166 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 3167 PetscCall(PetscViewerFlush(viewer)); 3168 3169 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 3170 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 3171 PetscCall(PetscViewerFlush(viewer)); 3172 3173 if (mumps->ninfo && mumps->ninfo <= 80) { 3174 PetscInt i; 3175 for (i = 0; i < mumps->ninfo; i++) { 3176 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 3177 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 3178 PetscCall(PetscViewerFlush(viewer)); 3179 } 3180 } 3181 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 3182 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 3183 3184 if (mumps->myid == 0) { /* information from the host */ 3185 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1))); 3186 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2))); 3187 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3))); 3188 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34))); 3189 3190 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 3191 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 3192 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 3193 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 3194 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 3195 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 3196 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 3197 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 3198 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 3199 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 3200 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 3201 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 3202 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 3203 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 3204 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 3205 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 3206 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 3207 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 3208 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 3209 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 3210 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 3211 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 3212 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 3213 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 3214 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 3215 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 3216 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 3217 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 3218 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 3219 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 3220 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 3221 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 3222 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 3223 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 3224 } 3225 } 3226 } 3227 PetscFunctionReturn(PETSC_SUCCESS); 3228 } 3229 3230 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info) 3231 { 3232 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 3233 3234 PetscFunctionBegin; 3235 info->block_size = 1.0; 3236 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 3237 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 3238 info->nz_unneeded = 0.0; 3239 info->assemblies = 0.0; 3240 info->mallocs = 0.0; 3241 info->memory = 0.0; 3242 info->fill_ratio_given = 0; 3243 info->fill_ratio_needed = 0; 3244 info->factor_mallocs = 0; 3245 PetscFunctionReturn(PETSC_SUCCESS); 3246 } 3247 3248 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 3249 { 3250 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3251 const PetscScalar *arr; 3252 const PetscInt *idxs; 3253 PetscInt size, i; 3254 3255 PetscFunctionBegin; 3256 PetscCall(ISGetLocalSize(is, &size)); 3257 /* Schur complement matrix */ 3258 PetscCall(MatDestroy(&F->schur)); 3259 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 3260 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 3261 // don't allocate mumps->id.schur[] now as its precision is yet to know 3262 PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur)); 3263 PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld)); 3264 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 3265 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 3266 3267 /* MUMPS expects Fortran style indices */ 3268 PetscCall(PetscFree(mumps->id.listvar_schur)); 3269 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 3270 PetscCall(ISGetIndices(is, &idxs)); 3271 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i])); 3272 PetscCall(ISRestoreIndices(is, &idxs)); 3273 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 3274 if (mumps->id.icntl) mumps->id.ICNTL(26) = -1; 3275 else mumps->ICNTL26 = -1; 3276 PetscFunctionReturn(PETSC_SUCCESS); 3277 } 3278 3279 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 3280 { 3281 Mat St; 3282 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3283 PetscScalar *array; 3284 PetscInt i, j, N = mumps->id.size_schur; 3285 3286 PetscFunctionBegin; 3287 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 3288 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 3289 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 3290 PetscCall(MatSetType(St, MATDENSE)); 3291 PetscCall(MatSetUp(St)); 3292 PetscCall(MatDenseGetArray(St, &array)); 3293 if (!mumps->sym) { /* MUMPS always return a full matrix */ 3294 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 3295 for (i = 0; i < N; i++) { 3296 for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3297 } 3298 } else { /* stored by columns */ 3299 PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array)); 3300 } 3301 } else { /* either full or lower-triangular (not packed) */ 3302 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 3303 for (i = 0; i < N; i++) { 3304 for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3305 } 3306 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 3307 PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array)); 3308 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 3309 for (i = 0; i < N; i++) { 3310 for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3311 } 3312 } 3313 } 3314 PetscCall(MatDenseRestoreArray(St, &array)); 3315 *S = St; 3316 PetscFunctionReturn(PETSC_SUCCESS); 3317 } 3318 3319 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 3320 { 3321 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3322 3323 PetscFunctionBegin; 3324 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 3325 PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 3326 for (i = 0; i < nICNTL_pre; ++i) 3327 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 3328 if (i == nICNTL_pre) { /* not already cached */ 3329 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 3330 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 3331 mumps->ICNTL_pre[0]++; 3332 } 3333 mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl; 3334 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 3335 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 3336 PetscFunctionReturn(PETSC_SUCCESS); 3337 } 3338 3339 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 3340 { 3341 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3342 3343 PetscFunctionBegin; 3344 if (mumps->id.job == JOB_NULL) { 3345 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 3346 *ival = 0; 3347 for (i = 0; i < nICNTL_pre; ++i) { 3348 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 3349 } 3350 } else *ival = mumps->id.ICNTL(icntl); 3351 PetscFunctionReturn(PETSC_SUCCESS); 3352 } 3353 3354 /*@ 3355 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 3356 3357 Logically Collective 3358 3359 Input Parameters: 3360 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3361 . icntl - index of MUMPS parameter array `ICNTL()` 3362 - ival - value of MUMPS `ICNTL(icntl)` 3363 3364 Options Database Key: 3365 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival` 3366 3367 Level: beginner 3368 3369 Note: 3370 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 3371 3372 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3373 @*/ 3374 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 3375 { 3376 PetscFunctionBegin; 3377 PetscValidType(F, 1); 3378 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3379 PetscValidLogicalCollectiveInt(F, icntl, 2); 3380 PetscValidLogicalCollectiveInt(F, ival, 3); 3381 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 3382 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 3383 PetscFunctionReturn(PETSC_SUCCESS); 3384 } 3385 3386 /*@ 3387 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 3388 3389 Logically Collective 3390 3391 Input Parameters: 3392 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3393 - icntl - index of MUMPS parameter array ICNTL() 3394 3395 Output Parameter: 3396 . ival - value of MUMPS ICNTL(icntl) 3397 3398 Level: beginner 3399 3400 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3401 @*/ 3402 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 3403 { 3404 PetscFunctionBegin; 3405 PetscValidType(F, 1); 3406 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3407 PetscValidLogicalCollectiveInt(F, icntl, 2); 3408 PetscAssertPointer(ival, 3); 3409 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 3410 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3411 PetscFunctionReturn(PETSC_SUCCESS); 3412 } 3413 3414 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 3415 { 3416 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3417 3418 PetscFunctionBegin; 3419 if (mumps->id.job == JOB_NULL) { 3420 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 3421 for (i = 0; i < nCNTL_pre; ++i) 3422 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 3423 if (i == nCNTL_pre) { 3424 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 3425 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 3426 mumps->CNTL_pre[0]++; 3427 } 3428 mumps->CNTL_pre[1 + 2 * i] = icntl; 3429 mumps->CNTL_pre[2 + 2 * i] = val; 3430 } else ID_CNTL_SET(mumps->id, icntl, val); 3431 PetscFunctionReturn(PETSC_SUCCESS); 3432 } 3433 3434 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 3435 { 3436 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3437 3438 PetscFunctionBegin; 3439 if (mumps->id.job == JOB_NULL) { 3440 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 3441 *val = 0.0; 3442 for (i = 0; i < nCNTL_pre; ++i) { 3443 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 3444 } 3445 } else *val = ID_CNTL_GET(mumps->id, icntl); 3446 PetscFunctionReturn(PETSC_SUCCESS); 3447 } 3448 3449 /*@ 3450 MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3451 3452 Logically Collective 3453 3454 Input Parameters: 3455 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3456 . icntl - index of MUMPS parameter array `CNTL()` 3457 - val - value of MUMPS `CNTL(icntl)` 3458 3459 Options Database Key: 3460 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 3461 3462 Level: beginner 3463 3464 Note: 3465 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 3466 3467 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3468 @*/ 3469 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 3470 { 3471 PetscFunctionBegin; 3472 PetscValidType(F, 1); 3473 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3474 PetscValidLogicalCollectiveInt(F, icntl, 2); 3475 PetscValidLogicalCollectiveReal(F, val, 3); 3476 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3477 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 3478 PetscFunctionReturn(PETSC_SUCCESS); 3479 } 3480 3481 /*@ 3482 MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3483 3484 Logically Collective 3485 3486 Input Parameters: 3487 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3488 - icntl - index of MUMPS parameter array CNTL() 3489 3490 Output Parameter: 3491 . val - value of MUMPS CNTL(icntl) 3492 3493 Level: beginner 3494 3495 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3496 @*/ 3497 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 3498 { 3499 PetscFunctionBegin; 3500 PetscValidType(F, 1); 3501 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3502 PetscValidLogicalCollectiveInt(F, icntl, 2); 3503 PetscAssertPointer(val, 3); 3504 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3505 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3506 PetscFunctionReturn(PETSC_SUCCESS); 3507 } 3508 3509 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 3510 { 3511 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3512 3513 PetscFunctionBegin; 3514 *info = mumps->id.INFO(icntl); 3515 PetscFunctionReturn(PETSC_SUCCESS); 3516 } 3517 3518 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 3519 { 3520 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3521 3522 PetscFunctionBegin; 3523 *infog = mumps->id.INFOG(icntl); 3524 PetscFunctionReturn(PETSC_SUCCESS); 3525 } 3526 3527 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 3528 { 3529 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3530 3531 PetscFunctionBegin; 3532 *rinfo = ID_RINFO_GET(mumps->id, icntl); 3533 PetscFunctionReturn(PETSC_SUCCESS); 3534 } 3535 3536 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 3537 { 3538 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3539 3540 PetscFunctionBegin; 3541 *rinfog = ID_RINFOG_GET(mumps->id, icntl); 3542 PetscFunctionReturn(PETSC_SUCCESS); 3543 } 3544 3545 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 3546 { 3547 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3548 3549 PetscFunctionBegin; 3550 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 3551 *size = 0; 3552 *array = NULL; 3553 if (!mumps->myid) { 3554 *size = mumps->id.INFOG(28); 3555 PetscCall(PetscMalloc1(*size, array)); 3556 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 3557 } 3558 PetscFunctionReturn(PETSC_SUCCESS); 3559 } 3560 3561 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 3562 { 3563 Mat Bt = NULL, Btseq = NULL; 3564 PetscBool flg; 3565 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3566 PetscScalar *aa; 3567 PetscInt spnr, *ia, *ja, M, nrhs; 3568 3569 PetscFunctionBegin; 3570 PetscAssertPointer(spRHS, 2); 3571 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 3572 PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 3573 PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 3574 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 3575 3576 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3577 3578 if (mumps->petsc_size > 1) { 3579 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 3580 Btseq = b->A; 3581 } else { 3582 Btseq = Bt; 3583 } 3584 3585 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 3586 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 3587 PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs)); 3588 mumps->id.rhs = NULL; 3589 3590 if (!mumps->myid) { 3591 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 3592 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3593 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3594 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3595 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 3596 } else { 3597 mumps->id.irhs_ptr = NULL; 3598 mumps->id.irhs_sparse = NULL; 3599 mumps->id.nz_rhs = 0; 3600 if (mumps->id.rhs_sparse_len) { 3601 PetscCall(PetscFree(mumps->id.rhs_sparse)); 3602 mumps->id.rhs_sparse_len = 0; 3603 } 3604 } 3605 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3606 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3607 3608 /* solve phase */ 3609 mumps->id.job = JOB_SOLVE; 3610 PetscMUMPS_c(mumps); 3611 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 3612 3613 if (!mumps->myid) { 3614 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3615 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3616 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3617 } 3618 PetscFunctionReturn(PETSC_SUCCESS); 3619 } 3620 3621 /*@ 3622 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc> 3623 3624 Logically Collective 3625 3626 Input Parameter: 3627 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3628 3629 Output Parameter: 3630 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3631 3632 Level: beginner 3633 3634 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3635 @*/ 3636 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3637 { 3638 PetscFunctionBegin; 3639 PetscValidType(F, 1); 3640 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3641 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3642 PetscFunctionReturn(PETSC_SUCCESS); 3643 } 3644 3645 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3646 { 3647 Mat spRHS; 3648 3649 PetscFunctionBegin; 3650 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3651 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3652 PetscCall(MatDestroy(&spRHS)); 3653 PetscFunctionReturn(PETSC_SUCCESS); 3654 } 3655 3656 /*@ 3657 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc> 3658 3659 Logically Collective 3660 3661 Input Parameter: 3662 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3663 3664 Output Parameter: 3665 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3666 3667 Level: beginner 3668 3669 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3670 @*/ 3671 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3672 { 3673 PetscBool flg; 3674 3675 PetscFunctionBegin; 3676 PetscValidType(F, 1); 3677 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3678 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3679 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3680 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3681 PetscFunctionReturn(PETSC_SUCCESS); 3682 } 3683 3684 static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 3685 { 3686 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3687 3688 PetscFunctionBegin; 3689 if (nblk) { 3690 PetscAssertPointer(blkptr, 4); 3691 PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk)); 3692 PetscCall(PetscFree(mumps->id.blkptr)); 3693 PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr)); 3694 for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i)); 3695 // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15). 3696 // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS(). 3697 // mumps->id.ICNTL(15) = 1; 3698 if (blkvar) { 3699 PetscCall(PetscFree(mumps->id.blkvar)); 3700 PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar)); 3701 for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i)); 3702 } 3703 } else { 3704 PetscCall(PetscFree(mumps->id.blkptr)); 3705 PetscCall(PetscFree(mumps->id.blkvar)); 3706 // mumps->id.ICNTL(15) = 0; 3707 mumps->id.nblk = 0; 3708 } 3709 PetscFunctionReturn(PETSC_SUCCESS); 3710 } 3711 3712 /*@ 3713 MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1` 3714 3715 Not collective, only relevant on the first process of the MPI communicator 3716 3717 Input Parameters: 3718 + F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3719 . nblk - the number of blocks 3720 . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk` 3721 - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks 3722 3723 Level: advanced 3724 3725 .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()` 3726 @*/ 3727 PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 3728 { 3729 PetscFunctionBegin; 3730 PetscValidType(F, 1); 3731 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3732 PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr)); 3733 PetscFunctionReturn(PETSC_SUCCESS); 3734 } 3735 3736 /*@ 3737 MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc> 3738 3739 Logically Collective 3740 3741 Input Parameters: 3742 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3743 - icntl - index of MUMPS parameter array INFO() 3744 3745 Output Parameter: 3746 . ival - value of MUMPS INFO(icntl) 3747 3748 Level: beginner 3749 3750 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3751 @*/ 3752 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3753 { 3754 PetscFunctionBegin; 3755 PetscValidType(F, 1); 3756 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3757 PetscAssertPointer(ival, 3); 3758 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3759 PetscFunctionReturn(PETSC_SUCCESS); 3760 } 3761 3762 /*@ 3763 MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc> 3764 3765 Logically Collective 3766 3767 Input Parameters: 3768 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3769 - icntl - index of MUMPS parameter array INFOG() 3770 3771 Output Parameter: 3772 . ival - value of MUMPS INFOG(icntl) 3773 3774 Level: beginner 3775 3776 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3777 @*/ 3778 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3779 { 3780 PetscFunctionBegin; 3781 PetscValidType(F, 1); 3782 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3783 PetscAssertPointer(ival, 3); 3784 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3785 PetscFunctionReturn(PETSC_SUCCESS); 3786 } 3787 3788 /*@ 3789 MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc> 3790 3791 Logically Collective 3792 3793 Input Parameters: 3794 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3795 - icntl - index of MUMPS parameter array RINFO() 3796 3797 Output Parameter: 3798 . val - value of MUMPS RINFO(icntl) 3799 3800 Level: beginner 3801 3802 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3803 @*/ 3804 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3805 { 3806 PetscFunctionBegin; 3807 PetscValidType(F, 1); 3808 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3809 PetscAssertPointer(val, 3); 3810 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3811 PetscFunctionReturn(PETSC_SUCCESS); 3812 } 3813 3814 /*@ 3815 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc> 3816 3817 Logically Collective 3818 3819 Input Parameters: 3820 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3821 - icntl - index of MUMPS parameter array RINFOG() 3822 3823 Output Parameter: 3824 . val - value of MUMPS RINFOG(icntl) 3825 3826 Level: beginner 3827 3828 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3829 @*/ 3830 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3831 { 3832 PetscFunctionBegin; 3833 PetscValidType(F, 1); 3834 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3835 PetscAssertPointer(val, 3); 3836 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3837 PetscFunctionReturn(PETSC_SUCCESS); 3838 } 3839 3840 /*@ 3841 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc> 3842 3843 Logically Collective 3844 3845 Input Parameter: 3846 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3847 3848 Output Parameters: 3849 + size - local size of the array. The size of the array is non-zero only on MPI rank 0 3850 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3851 for freeing this array. 3852 3853 Level: beginner 3854 3855 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3856 @*/ 3857 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3858 { 3859 PetscFunctionBegin; 3860 PetscValidType(F, 1); 3861 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3862 PetscAssertPointer(size, 2); 3863 PetscAssertPointer(array, 3); 3864 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3865 PetscFunctionReturn(PETSC_SUCCESS); 3866 } 3867 3868 /*MC 3869 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3870 MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc> 3871 3872 Works with `MATAIJ` and `MATSBAIJ` matrices 3873 3874 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3875 3876 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3877 See details below. 3878 3879 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3880 3881 Options Database Keys: 3882 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3883 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3884 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3885 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3886 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3887 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3888 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3889 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3890 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3891 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3892 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3893 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3894 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3895 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3896 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3897 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3898 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3899 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3900 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3901 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3902 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3903 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering 3904 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3905 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3906 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3907 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3908 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3909 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3910 . -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB) 3911 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3912 . -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism 3913 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3914 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3915 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3916 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3917 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3918 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3919 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3920 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3921 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3922 3923 Level: beginner 3924 3925 Notes: 3926 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will 3927 error if the matrix is Hermitian. 3928 3929 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3930 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3931 3932 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3933 the failure with 3934 .vb 3935 KSPGetPC(ksp,&pc); 3936 PCFactorGetMatrix(pc,&mat); 3937 MatMumpsGetInfo(mat,....); 3938 MatMumpsGetInfog(mat,....); etc. 3939 .ve 3940 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3941 3942 MUMPS provides 64-bit integer support in two build modes: 3943 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3944 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3945 3946 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3947 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3948 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3949 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3950 3951 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3952 3953 Two modes to run MUMPS/PETSc with OpenMP 3954 .vb 3955 Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3956 threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test". 3957 .ve 3958 3959 .vb 3960 `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example, 3961 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3962 .ve 3963 3964 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3965 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3966 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3967 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3968 (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided). 3969 3970 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3971 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3972 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3973 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3974 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3975 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3976 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3977 MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3978 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3979 problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding. 3980 For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and 3981 examine the mapping result. 3982 3983 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3984 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3985 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3986 3987 See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating` 3988 3989 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3990 M*/ 3991 3992 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type) 3993 { 3994 PetscFunctionBegin; 3995 *type = MATSOLVERMUMPS; 3996 PetscFunctionReturn(PETSC_SUCCESS); 3997 } 3998 3999 /* MatGetFactor for Seq and MPI AIJ matrices */ 4000 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 4001 { 4002 Mat B; 4003 Mat_MUMPS *mumps; 4004 PetscBool isSeqAIJ, isDiag, isDense; 4005 PetscMPIInt size; 4006 4007 PetscFunctionBegin; 4008 #if defined(PETSC_USE_COMPLEX) 4009 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4010 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4011 *F = NULL; 4012 PetscFunctionReturn(PETSC_SUCCESS); 4013 } 4014 #endif 4015 /* Create the factorization matrix */ 4016 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 4017 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 4018 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 4019 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4020 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4021 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4022 PetscCall(MatSetUp(B)); 4023 4024 PetscCall(PetscNew(&mumps)); 4025 4026 B->ops->view = MatView_MUMPS; 4027 B->ops->getinfo = MatGetInfo_MUMPS; 4028 4029 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4030 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4031 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4032 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4033 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4034 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4035 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4036 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4037 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4038 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4039 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4040 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4041 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4042 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4043 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4044 4045 if (ftype == MAT_FACTOR_LU) { 4046 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4047 B->factortype = MAT_FACTOR_LU; 4048 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 4049 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 4050 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 4051 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 4052 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4053 mumps->sym = 0; 4054 } else { 4055 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4056 B->factortype = MAT_FACTOR_CHOLESKY; 4057 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 4058 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 4059 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 4060 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 4061 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 4062 #if defined(PETSC_USE_COMPLEX) 4063 mumps->sym = 2; 4064 #else 4065 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4066 else mumps->sym = 2; 4067 #endif 4068 } 4069 4070 /* set solvertype */ 4071 PetscCall(PetscFree(B->solvertype)); 4072 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4073 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4074 if (size == 1) { 4075 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4076 B->canuseordering = PETSC_TRUE; 4077 } 4078 B->ops->destroy = MatDestroy_MUMPS; 4079 B->data = (void *)mumps; 4080 4081 *F = B; 4082 mumps->id.job = JOB_NULL; 4083 mumps->ICNTL_pre = NULL; 4084 mumps->CNTL_pre = NULL; 4085 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4086 PetscFunctionReturn(PETSC_SUCCESS); 4087 } 4088 4089 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 4090 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F) 4091 { 4092 Mat B; 4093 Mat_MUMPS *mumps; 4094 PetscBool isSeqSBAIJ; 4095 PetscMPIInt size; 4096 4097 PetscFunctionBegin; 4098 #if defined(PETSC_USE_COMPLEX) 4099 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4100 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4101 *F = NULL; 4102 PetscFunctionReturn(PETSC_SUCCESS); 4103 } 4104 #endif 4105 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4106 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4107 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4108 PetscCall(MatSetUp(B)); 4109 4110 PetscCall(PetscNew(&mumps)); 4111 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 4112 if (isSeqSBAIJ) { 4113 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 4114 } else { 4115 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 4116 } 4117 4118 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4119 B->ops->view = MatView_MUMPS; 4120 B->ops->getinfo = MatGetInfo_MUMPS; 4121 4122 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4123 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4124 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4125 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4126 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4127 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4128 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4129 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4130 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4131 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4132 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4133 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4134 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4135 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4136 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4137 4138 B->factortype = MAT_FACTOR_CHOLESKY; 4139 #if defined(PETSC_USE_COMPLEX) 4140 mumps->sym = 2; 4141 #else 4142 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4143 else mumps->sym = 2; 4144 #endif 4145 4146 /* set solvertype */ 4147 PetscCall(PetscFree(B->solvertype)); 4148 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4149 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4150 if (size == 1) { 4151 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4152 B->canuseordering = PETSC_TRUE; 4153 } 4154 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 4155 B->ops->destroy = MatDestroy_MUMPS; 4156 B->data = (void *)mumps; 4157 4158 *F = B; 4159 mumps->id.job = JOB_NULL; 4160 mumps->ICNTL_pre = NULL; 4161 mumps->CNTL_pre = NULL; 4162 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4163 PetscFunctionReturn(PETSC_SUCCESS); 4164 } 4165 4166 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 4167 { 4168 Mat B; 4169 Mat_MUMPS *mumps; 4170 PetscBool isSeqBAIJ; 4171 PetscMPIInt size; 4172 4173 PetscFunctionBegin; 4174 /* Create the factorization matrix */ 4175 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 4176 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4177 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4178 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4179 PetscCall(MatSetUp(B)); 4180 4181 PetscCall(PetscNew(&mumps)); 4182 PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 4183 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 4184 B->factortype = MAT_FACTOR_LU; 4185 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 4186 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 4187 mumps->sym = 0; 4188 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4189 4190 B->ops->view = MatView_MUMPS; 4191 B->ops->getinfo = MatGetInfo_MUMPS; 4192 4193 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4194 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4195 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4196 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4197 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4198 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4199 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4200 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4201 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4202 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4203 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4204 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4205 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4206 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4207 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4208 4209 /* set solvertype */ 4210 PetscCall(PetscFree(B->solvertype)); 4211 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4212 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4213 if (size == 1) { 4214 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4215 B->canuseordering = PETSC_TRUE; 4216 } 4217 B->ops->destroy = MatDestroy_MUMPS; 4218 B->data = (void *)mumps; 4219 4220 *F = B; 4221 mumps->id.job = JOB_NULL; 4222 mumps->ICNTL_pre = NULL; 4223 mumps->CNTL_pre = NULL; 4224 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4225 PetscFunctionReturn(PETSC_SUCCESS); 4226 } 4227 4228 /* MatGetFactor for Seq and MPI SELL matrices */ 4229 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 4230 { 4231 Mat B; 4232 Mat_MUMPS *mumps; 4233 PetscBool isSeqSELL; 4234 PetscMPIInt size; 4235 4236 PetscFunctionBegin; 4237 /* Create the factorization matrix */ 4238 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 4239 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4240 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4241 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4242 PetscCall(MatSetUp(B)); 4243 4244 PetscCall(PetscNew(&mumps)); 4245 4246 B->ops->view = MatView_MUMPS; 4247 B->ops->getinfo = MatGetInfo_MUMPS; 4248 4249 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4250 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4251 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4252 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4253 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4254 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4255 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4256 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4257 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4258 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4259 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4260 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4261 4262 PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 4263 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4264 B->factortype = MAT_FACTOR_LU; 4265 PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 4266 mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 4267 mumps->sym = 0; 4268 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4269 4270 /* set solvertype */ 4271 PetscCall(PetscFree(B->solvertype)); 4272 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4273 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4274 if (size == 1) { 4275 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4276 B->canuseordering = PETSC_TRUE; 4277 } 4278 B->ops->destroy = MatDestroy_MUMPS; 4279 B->data = (void *)mumps; 4280 4281 *F = B; 4282 mumps->id.job = JOB_NULL; 4283 mumps->ICNTL_pre = NULL; 4284 mumps->CNTL_pre = NULL; 4285 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4286 PetscFunctionReturn(PETSC_SUCCESS); 4287 } 4288 4289 /* MatGetFactor for MATNEST matrices */ 4290 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 4291 { 4292 Mat B, **mats; 4293 Mat_MUMPS *mumps; 4294 PetscInt nr, nc; 4295 PetscMPIInt size; 4296 PetscBool flg = PETSC_TRUE; 4297 4298 PetscFunctionBegin; 4299 #if defined(PETSC_USE_COMPLEX) 4300 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4301 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4302 *F = NULL; 4303 PetscFunctionReturn(PETSC_SUCCESS); 4304 } 4305 #endif 4306 4307 /* Return if some condition is not satisfied */ 4308 *F = NULL; 4309 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 4310 if (ftype == MAT_FACTOR_CHOLESKY) { 4311 IS *rows, *cols; 4312 PetscInt *m, *M; 4313 4314 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 4315 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 4316 PetscCall(MatNestGetISs(A, rows, cols)); 4317 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 4318 if (!flg) { 4319 PetscCall(PetscFree2(rows, cols)); 4320 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 4321 PetscFunctionReturn(PETSC_SUCCESS); 4322 } 4323 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 4324 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 4325 for (PetscInt r = 0; flg && r < nr; r++) 4326 for (PetscInt k = r + 1; flg && k < nr; k++) 4327 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 4328 PetscCall(PetscFree2(m, M)); 4329 PetscCall(PetscFree2(rows, cols)); 4330 if (!flg) { 4331 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 4332 PetscFunctionReturn(PETSC_SUCCESS); 4333 } 4334 } 4335 4336 for (PetscInt r = 0; r < nr; r++) { 4337 for (PetscInt c = 0; c < nc; c++) { 4338 Mat sub = mats[r][c]; 4339 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 4340 4341 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 4342 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 4343 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 4344 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 4345 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 4346 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 4347 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 4348 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 4349 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 4350 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 4351 if (ftype == MAT_FACTOR_CHOLESKY) { 4352 if (r == c) { 4353 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 4354 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 4355 flg = PETSC_FALSE; 4356 } 4357 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 4358 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 4359 flg = PETSC_FALSE; 4360 } 4361 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 4362 PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 4363 flg = PETSC_FALSE; 4364 } 4365 } 4366 } 4367 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 4368 4369 /* Create the factorization matrix */ 4370 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4371 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4372 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4373 PetscCall(MatSetUp(B)); 4374 4375 PetscCall(PetscNew(&mumps)); 4376 4377 B->ops->view = MatView_MUMPS; 4378 B->ops->getinfo = MatGetInfo_MUMPS; 4379 4380 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4381 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4382 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4383 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4384 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4385 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4386 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4387 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4388 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4389 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4390 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4391 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4392 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4393 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4394 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4395 4396 if (ftype == MAT_FACTOR_LU) { 4397 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4398 B->factortype = MAT_FACTOR_LU; 4399 mumps->sym = 0; 4400 } else { 4401 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4402 B->factortype = MAT_FACTOR_CHOLESKY; 4403 #if defined(PETSC_USE_COMPLEX) 4404 mumps->sym = 2; 4405 #else 4406 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4407 else mumps->sym = 2; 4408 #endif 4409 } 4410 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 4411 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 4412 4413 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4414 if (size == 1) { 4415 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4416 B->canuseordering = PETSC_TRUE; 4417 } 4418 4419 /* set solvertype */ 4420 PetscCall(PetscFree(B->solvertype)); 4421 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4422 B->ops->destroy = MatDestroy_MUMPS; 4423 B->data = (void *)mumps; 4424 4425 *F = B; 4426 mumps->id.job = JOB_NULL; 4427 mumps->ICNTL_pre = NULL; 4428 mumps->CNTL_pre = NULL; 4429 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4430 PetscFunctionReturn(PETSC_SUCCESS); 4431 } 4432 4433 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 4434 { 4435 PetscFunctionBegin; 4436 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4437 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4438 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 4439 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 4440 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 4441 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4442 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4443 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 4444 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 4445 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 4446 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 4447 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4448 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4449 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4450 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4451 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4452 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4453 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 4454 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 4455 PetscFunctionReturn(PETSC_SUCCESS); 4456 } 4457