1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #if defined(PETSC_USE_REAL_SINGLE) 15 #include <cmumps_c.h> 16 #else 17 #include <zmumps_c.h> 18 #endif 19 #else 20 #if defined(PETSC_USE_REAL_SINGLE) 21 #include <smumps_c.h> 22 #else 23 #include <dmumps_c.h> 24 #endif 25 #endif 26 EXTERN_C_END 27 #define JOB_INIT -1 28 #define JOB_NULL 0 29 #define JOB_FACTSYMBOLIC 1 30 #define JOB_FACTNUMERIC 2 31 #define JOB_SOLVE 3 32 #define JOB_END -2 33 34 /* calls to MUMPS */ 35 #if defined(PETSC_USE_COMPLEX) 36 #if defined(PETSC_USE_REAL_SINGLE) 37 #define MUMPS_c cmumps_c 38 #else 39 #define MUMPS_c zmumps_c 40 #endif 41 #else 42 #if defined(PETSC_USE_REAL_SINGLE) 43 #define MUMPS_c smumps_c 44 #else 45 #define MUMPS_c dmumps_c 46 #endif 47 #endif 48 49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 50 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 51 naming convention in PetscMPIInt, PetscBLASInt etc. 52 */ 53 typedef MUMPS_INT PetscMUMPSInt; 54 55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 56 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 57 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 58 #endif 59 #else 60 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 61 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 62 #endif 63 #endif 64 65 #define MPIU_MUMPSINT MPI_INT 66 #define PETSC_MUMPS_INT_MAX 2147483647 67 #define PETSC_MUMPS_INT_MIN -2147483648 68 69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 70 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 71 { 72 PetscFunctionBegin; 73 #if PetscDefined(USE_64BIT_INDICES) 74 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 75 #endif 76 *b = (PetscMUMPSInt)(a); 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 /* Put these utility routines here since they are only used in this file */ 81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 82 { 83 PetscInt myval; 84 PetscBool myset; 85 86 PetscFunctionBegin; 87 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 88 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 89 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 90 if (set) *set = myset; 91 PetscFunctionReturn(PETSC_SUCCESS); 92 } 93 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 94 95 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 96 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 97 #define PetscMUMPS_c(mumps) \ 98 do { \ 99 if (mumps->use_petsc_omp_support) { \ 100 if (mumps->is_omp_master) { \ 101 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 102 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 103 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 104 PetscCall(PetscFPTrapPop()); \ 105 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 106 } \ 107 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 108 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 109 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 110 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 111 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 112 */ \ 113 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 115 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 116 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 117 } else { \ 118 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 119 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 120 PetscCall(PetscFPTrapPop()); \ 121 } \ 122 } while (0) 123 #else 124 #define PetscMUMPS_c(mumps) \ 125 do { \ 126 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 127 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 128 PetscCall(PetscFPTrapPop()); \ 129 } while (0) 130 #endif 131 132 /* declare MumpsScalar */ 133 #if defined(PETSC_USE_COMPLEX) 134 #if defined(PETSC_USE_REAL_SINGLE) 135 #define MumpsScalar mumps_complex 136 #else 137 #define MumpsScalar mumps_double_complex 138 #endif 139 #else 140 #define MumpsScalar PetscScalar 141 #endif 142 143 /* macros s.t. indices match MUMPS documentation */ 144 #define ICNTL(I) icntl[(I) - 1] 145 #define CNTL(I) cntl[(I) - 1] 146 #define INFOG(I) infog[(I) - 1] 147 #define INFO(I) info[(I) - 1] 148 #define RINFOG(I) rinfog[(I) - 1] 149 #define RINFO(I) rinfo[(I) - 1] 150 151 typedef struct Mat_MUMPS Mat_MUMPS; 152 struct Mat_MUMPS { 153 #if defined(PETSC_USE_COMPLEX) 154 #if defined(PETSC_USE_REAL_SINGLE) 155 CMUMPS_STRUC_C id; 156 #else 157 ZMUMPS_STRUC_C id; 158 #endif 159 #else 160 #if defined(PETSC_USE_REAL_SINGLE) 161 SMUMPS_STRUC_C id; 162 #else 163 DMUMPS_STRUC_C id; 164 #endif 165 #endif 166 167 MatStructure matstruc; 168 PetscMPIInt myid, petsc_size; 169 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 170 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 171 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 172 PetscMUMPSInt sym; 173 MPI_Comm mumps_comm; 174 PetscMUMPSInt *ICNTL_pre; 175 PetscReal *CNTL_pre; 176 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 177 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 178 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 179 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 180 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 181 PetscInt *rhs_nrow, max_nrhs; 182 PetscMPIInt *rhs_recvcounts, *rhs_disps; 183 PetscScalar *rhs_loc, *rhs_recvbuf; 184 #endif 185 Vec b_seq, x_seq; 186 PetscInt ninfo, *info; /* which INFO to display */ 187 PetscInt sizeredrhs; 188 PetscScalar *schur_sol; 189 PetscInt schur_sizesol; 190 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 191 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 192 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 193 194 /* Support for MATNEST */ 195 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 196 PetscInt64 *nest_vals_start; 197 PetscScalar *nest_vals; 198 199 /* stuff used by petsc/mumps OpenMP support*/ 200 PetscBool use_petsc_omp_support; 201 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 202 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 203 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 204 PetscMPIInt tag, omp_comm_size; 205 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 206 MPI_Request *reqs; 207 }; 208 209 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 210 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 211 */ 212 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 213 { 214 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 215 216 PetscFunctionBegin; 217 #if defined(PETSC_USE_64BIT_INDICES) 218 { 219 PetscInt i; 220 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 221 PetscCall(PetscFree(mumps->ia_alloc)); 222 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 223 mumps->cur_ilen = nrow + 1; 224 } 225 if (nnz > mumps->cur_jlen) { 226 PetscCall(PetscFree(mumps->ja_alloc)); 227 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 228 mumps->cur_jlen = nnz; 229 } 230 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i])); 231 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i])); 232 *ia_mumps = mumps->ia_alloc; 233 *ja_mumps = mumps->ja_alloc; 234 } 235 #else 236 *ia_mumps = ia; 237 *ja_mumps = ja; 238 #endif 239 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 240 PetscFunctionReturn(PETSC_SUCCESS); 241 } 242 243 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 244 { 245 PetscFunctionBegin; 246 PetscCall(PetscFree(mumps->id.listvar_schur)); 247 PetscCall(PetscFree(mumps->id.redrhs)); 248 PetscCall(PetscFree(mumps->schur_sol)); 249 mumps->id.size_schur = 0; 250 mumps->id.schur_lld = 0; 251 mumps->id.ICNTL(19) = 0; 252 PetscFunctionReturn(PETSC_SUCCESS); 253 } 254 255 /* solve with rhs in mumps->id.redrhs and return in the same location */ 256 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 257 { 258 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 259 Mat S, B, X; 260 MatFactorSchurStatus schurstatus; 261 PetscInt sizesol; 262 263 PetscFunctionBegin; 264 PetscCall(MatFactorFactorizeSchurComplement(F)); 265 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 266 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 267 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 268 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 269 PetscCall(MatBindToCPU(B, S->boundtocpu)); 270 #endif 271 switch (schurstatus) { 272 case MAT_FACTOR_SCHUR_FACTORED: 273 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 274 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 275 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 276 PetscCall(MatBindToCPU(X, S->boundtocpu)); 277 #endif 278 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 279 PetscCall(MatMatSolveTranspose(S, B, X)); 280 } else { 281 PetscCall(MatMatSolve(S, B, X)); 282 } 283 break; 284 case MAT_FACTOR_SCHUR_INVERTED: 285 sizesol = mumps->id.nrhs * mumps->id.size_schur; 286 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 287 PetscCall(PetscFree(mumps->schur_sol)); 288 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 289 mumps->schur_sizesol = sizesol; 290 } 291 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 292 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 294 PetscCall(MatBindToCPU(X, S->boundtocpu)); 295 #endif 296 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 297 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 298 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 299 } else { 300 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 301 } 302 PetscCall(MatProductSetFromOptions(X)); 303 PetscCall(MatProductSymbolic(X)); 304 PetscCall(MatProductNumeric(X)); 305 306 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 307 break; 308 default: 309 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 310 } 311 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 312 PetscCall(MatDestroy(&B)); 313 PetscCall(MatDestroy(&X)); 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 318 { 319 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 320 321 PetscFunctionBegin; 322 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 if (!expansion) { /* prepare for the condensation step */ 326 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 327 /* allocate MUMPS internal array to store reduced right-hand sides */ 328 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 329 PetscCall(PetscFree(mumps->id.redrhs)); 330 mumps->id.lredrhs = mumps->id.size_schur; 331 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 332 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 333 } 334 } else { /* prepare for the expansion step */ 335 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 336 PetscCall(MatMumpsSolveSchur_Private(F)); 337 mumps->id.ICNTL(26) = 2; /* expansion phase */ 338 PetscMUMPS_c(mumps); 339 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 340 /* restore defaults */ 341 mumps->id.ICNTL(26) = -1; 342 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 343 if (mumps->id.nrhs > 1) { 344 PetscCall(PetscFree(mumps->id.redrhs)); 345 mumps->id.lredrhs = 0; 346 mumps->sizeredrhs = 0; 347 } 348 } 349 PetscFunctionReturn(PETSC_SUCCESS); 350 } 351 352 /* 353 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 354 355 input: 356 A - matrix in aij,baij or sbaij format 357 shift - 0: C style output triple; 1: Fortran style output triple. 358 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 359 MAT_REUSE_MATRIX: only the values in v array are updated 360 output: 361 nnz - dim of r, c, and v (number of local nonzero entries of A) 362 r, c, v - row and col index, matrix values (matrix triples) 363 364 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 365 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 366 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 367 368 */ 369 370 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 371 { 372 const PetscScalar *av; 373 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 374 PetscInt64 nz, rnz, i, j, k; 375 PetscMUMPSInt *row, *col; 376 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 377 378 PetscFunctionBegin; 379 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 380 if (reuse == MAT_INITIAL_MATRIX) { 381 nz = aa->nz; 382 ai = aa->i; 383 aj = aa->j; 384 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 385 for (i = k = 0; i < M; i++) { 386 rnz = ai[i + 1] - ai[i]; 387 ajj = aj + ai[i]; 388 for (j = 0; j < rnz; j++) { 389 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 390 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 391 k++; 392 } 393 } 394 mumps->val = (PetscScalar *)av; 395 mumps->irn = row; 396 mumps->jcn = col; 397 mumps->nnz = nz; 398 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 399 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 400 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 401 PetscFunctionReturn(PETSC_SUCCESS); 402 } 403 404 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 405 { 406 PetscInt64 nz, i, j, k, r; 407 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 408 PetscMUMPSInt *row, *col; 409 410 PetscFunctionBegin; 411 nz = a->sliidx[a->totalslices]; 412 if (reuse == MAT_INITIAL_MATRIX) { 413 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 414 for (i = k = 0; i < a->totalslices; i++) { 415 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 416 } 417 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 418 mumps->irn = row; 419 mumps->jcn = col; 420 mumps->nnz = nz; 421 mumps->val = a->val; 422 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 423 else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 424 PetscFunctionReturn(PETSC_SUCCESS); 425 } 426 427 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 428 { 429 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 430 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 431 PetscInt64 M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 432 PetscInt bs; 433 PetscMUMPSInt *row, *col; 434 435 PetscFunctionBegin; 436 if (reuse == MAT_INITIAL_MATRIX) { 437 PetscCall(MatGetBlockSize(A, &bs)); 438 M = A->rmap->N / bs; 439 ai = aa->i; 440 aj = aa->j; 441 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 442 for (i = 0; i < M; i++) { 443 ajj = aj + ai[i]; 444 rnz = ai[i + 1] - ai[i]; 445 for (k = 0; k < rnz; k++) { 446 for (j = 0; j < bs; j++) { 447 for (m = 0; m < bs; m++) { 448 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 449 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 450 idx++; 451 } 452 } 453 } 454 } 455 mumps->irn = row; 456 mumps->jcn = col; 457 mumps->nnz = nz; 458 mumps->val = aa->a; 459 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 460 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 461 PetscFunctionReturn(PETSC_SUCCESS); 462 } 463 464 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 465 { 466 const PetscInt *ai, *aj, *ajj; 467 PetscInt bs; 468 PetscInt64 nz, rnz, i, j, k, m; 469 PetscMUMPSInt *row, *col; 470 PetscScalar *val; 471 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 472 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 473 #if defined(PETSC_USE_COMPLEX) 474 PetscBool isset, hermitian; 475 #endif 476 477 PetscFunctionBegin; 478 #if defined(PETSC_USE_COMPLEX) 479 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 480 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 481 #endif 482 ai = aa->i; 483 aj = aa->j; 484 PetscCall(MatGetBlockSize(A, &bs)); 485 if (reuse == MAT_INITIAL_MATRIX) { 486 const PetscInt64 alloc_size = aa->nz * bs2; 487 488 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 489 if (bs > 1) { 490 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 491 mumps->val = mumps->val_alloc; 492 } else { 493 mumps->val = aa->a; 494 } 495 mumps->irn = row; 496 mumps->jcn = col; 497 } else { 498 row = mumps->irn; 499 col = mumps->jcn; 500 } 501 val = mumps->val; 502 503 nz = 0; 504 if (bs > 1) { 505 for (i = 0; i < mbs; i++) { 506 rnz = ai[i + 1] - ai[i]; 507 ajj = aj + ai[i]; 508 for (j = 0; j < rnz; j++) { 509 for (k = 0; k < bs; k++) { 510 for (m = 0; m < bs; m++) { 511 if (ajj[j] > i || k >= m) { 512 if (reuse == MAT_INITIAL_MATRIX) { 513 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 514 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 515 } 516 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 517 } 518 } 519 } 520 } 521 } 522 } else if (reuse == MAT_INITIAL_MATRIX) { 523 for (i = 0; i < mbs; i++) { 524 rnz = ai[i + 1] - ai[i]; 525 ajj = aj + ai[i]; 526 for (j = 0; j < rnz; j++) { 527 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 528 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 529 nz++; 530 } 531 } 532 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 533 } else if (mumps->nest_vals) 534 PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 535 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 536 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 537 PetscFunctionReturn(PETSC_SUCCESS); 538 } 539 540 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 541 { 542 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 543 PetscInt64 nz, rnz, i, j; 544 const PetscScalar *av, *v1; 545 PetscScalar *val; 546 PetscMUMPSInt *row, *col; 547 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 548 PetscBool missing; 549 #if defined(PETSC_USE_COMPLEX) 550 PetscBool hermitian, isset; 551 #endif 552 553 PetscFunctionBegin; 554 #if defined(PETSC_USE_COMPLEX) 555 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 556 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 557 #endif 558 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 559 ai = aa->i; 560 aj = aa->j; 561 adiag = aa->diag; 562 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 563 if (reuse == MAT_INITIAL_MATRIX) { 564 /* count nz in the upper triangular part of A */ 565 nz = 0; 566 if (missing) { 567 for (i = 0; i < M; i++) { 568 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 569 for (j = ai[i]; j < ai[i + 1]; j++) { 570 if (aj[j] < i) continue; 571 nz++; 572 } 573 } else { 574 nz += ai[i + 1] - adiag[i]; 575 } 576 } 577 } else { 578 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 579 } 580 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 581 PetscCall(PetscMalloc1(nz, &val)); 582 mumps->nnz = nz; 583 mumps->irn = row; 584 mumps->jcn = col; 585 mumps->val = mumps->val_alloc = val; 586 587 nz = 0; 588 if (missing) { 589 for (i = 0; i < M; i++) { 590 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 591 for (j = ai[i]; j < ai[i + 1]; j++) { 592 if (aj[j] < i) continue; 593 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 594 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 595 val[nz] = av[j]; 596 nz++; 597 } 598 } else { 599 rnz = ai[i + 1] - adiag[i]; 600 ajj = aj + adiag[i]; 601 v1 = av + adiag[i]; 602 for (j = 0; j < rnz; j++) { 603 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 604 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 605 val[nz++] = v1[j]; 606 } 607 } 608 } 609 } else { 610 for (i = 0; i < M; i++) { 611 rnz = ai[i + 1] - adiag[i]; 612 ajj = aj + adiag[i]; 613 v1 = av + adiag[i]; 614 for (j = 0; j < rnz; j++) { 615 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 616 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 617 val[nz++] = v1[j]; 618 } 619 } 620 } 621 } else { 622 nz = 0; 623 val = mumps->val; 624 if (missing) { 625 for (i = 0; i < M; i++) { 626 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 627 for (j = ai[i]; j < ai[i + 1]; j++) { 628 if (aj[j] < i) continue; 629 val[nz++] = av[j]; 630 } 631 } else { 632 rnz = ai[i + 1] - adiag[i]; 633 v1 = av + adiag[i]; 634 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 635 } 636 } 637 } else { 638 for (i = 0; i < M; i++) { 639 rnz = ai[i + 1] - adiag[i]; 640 v1 = av + adiag[i]; 641 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 642 } 643 } 644 } 645 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 646 PetscFunctionReturn(PETSC_SUCCESS); 647 } 648 649 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 650 { 651 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 652 PetscInt bs; 653 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 654 PetscMUMPSInt *row, *col; 655 const PetscScalar *av, *bv, *v1, *v2; 656 PetscScalar *val; 657 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 658 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)mat->A->data; 659 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 660 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 661 #if defined(PETSC_USE_COMPLEX) 662 PetscBool hermitian, isset; 663 #endif 664 665 PetscFunctionBegin; 666 #if defined(PETSC_USE_COMPLEX) 667 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 668 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 669 #endif 670 PetscCall(MatGetBlockSize(A, &bs)); 671 rstart = A->rmap->rstart; 672 ai = aa->i; 673 aj = aa->j; 674 bi = bb->i; 675 bj = bb->j; 676 av = aa->a; 677 bv = bb->a; 678 679 garray = mat->garray; 680 681 if (reuse == MAT_INITIAL_MATRIX) { 682 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 683 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 684 PetscCall(PetscMalloc1(nz, &val)); 685 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 686 mumps->irn = row; 687 mumps->jcn = col; 688 mumps->val = mumps->val_alloc = val; 689 } else { 690 val = mumps->val; 691 } 692 693 jj = 0; 694 irow = rstart; 695 for (i = 0; i < mbs; i++) { 696 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 697 countA = ai[i + 1] - ai[i]; 698 countB = bi[i + 1] - bi[i]; 699 bjj = bj + bi[i]; 700 v1 = av + ai[i] * bs2; 701 v2 = bv + bi[i] * bs2; 702 703 if (bs > 1) { 704 /* A-part */ 705 for (j = 0; j < countA; j++) { 706 for (k = 0; k < bs; k++) { 707 for (m = 0; m < bs; m++) { 708 if (rstart + ajj[j] * bs > irow || k >= m) { 709 if (reuse == MAT_INITIAL_MATRIX) { 710 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 711 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 712 } 713 val[jj++] = v1[j * bs2 + m + k * bs]; 714 } 715 } 716 } 717 } 718 719 /* B-part */ 720 for (j = 0; j < countB; j++) { 721 for (k = 0; k < bs; k++) { 722 for (m = 0; m < bs; m++) { 723 if (reuse == MAT_INITIAL_MATRIX) { 724 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 725 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 726 } 727 val[jj++] = v2[j * bs2 + m + k * bs]; 728 } 729 } 730 } 731 } else { 732 /* A-part */ 733 for (j = 0; j < countA; j++) { 734 if (reuse == MAT_INITIAL_MATRIX) { 735 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 736 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 737 } 738 val[jj++] = v1[j]; 739 } 740 741 /* B-part */ 742 for (j = 0; j < countB; j++) { 743 if (reuse == MAT_INITIAL_MATRIX) { 744 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 745 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 746 } 747 val[jj++] = v2[j]; 748 } 749 } 750 irow += bs; 751 } 752 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 753 PetscFunctionReturn(PETSC_SUCCESS); 754 } 755 756 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 757 { 758 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 759 PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 760 PetscMUMPSInt *row, *col; 761 const PetscScalar *av, *bv, *v1, *v2; 762 PetscScalar *val; 763 Mat Ad, Ao; 764 Mat_SeqAIJ *aa; 765 Mat_SeqAIJ *bb; 766 767 PetscFunctionBegin; 768 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 769 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 770 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 771 772 aa = (Mat_SeqAIJ *)(Ad)->data; 773 bb = (Mat_SeqAIJ *)(Ao)->data; 774 ai = aa->i; 775 aj = aa->j; 776 bi = bb->i; 777 bj = bb->j; 778 779 rstart = A->rmap->rstart; 780 cstart = A->cmap->rstart; 781 782 if (reuse == MAT_INITIAL_MATRIX) { 783 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 784 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 785 PetscCall(PetscMalloc1(nz, &val)); 786 mumps->nnz = nz; 787 mumps->irn = row; 788 mumps->jcn = col; 789 mumps->val = mumps->val_alloc = val; 790 } else { 791 val = mumps->val; 792 } 793 794 jj = 0; 795 irow = rstart; 796 for (i = 0; i < m; i++) { 797 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 798 countA = ai[i + 1] - ai[i]; 799 countB = bi[i + 1] - bi[i]; 800 bjj = bj + bi[i]; 801 v1 = av + ai[i]; 802 v2 = bv + bi[i]; 803 804 /* A-part */ 805 for (j = 0; j < countA; j++) { 806 if (reuse == MAT_INITIAL_MATRIX) { 807 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 808 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 809 } 810 val[jj++] = v1[j]; 811 } 812 813 /* B-part */ 814 for (j = 0; j < countB; j++) { 815 if (reuse == MAT_INITIAL_MATRIX) { 816 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 817 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 818 } 819 val[jj++] = v2[j]; 820 } 821 irow++; 822 } 823 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 824 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 825 PetscFunctionReturn(PETSC_SUCCESS); 826 } 827 828 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 829 { 830 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 831 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)mat->A->data; 832 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 833 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 834 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 835 const PetscInt bs2 = mat->bs2; 836 PetscInt bs; 837 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 838 PetscMUMPSInt *row, *col; 839 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 840 PetscScalar *val; 841 842 PetscFunctionBegin; 843 PetscCall(MatGetBlockSize(A, &bs)); 844 if (reuse == MAT_INITIAL_MATRIX) { 845 nz = bs2 * (aa->nz + bb->nz); 846 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 847 PetscCall(PetscMalloc1(nz, &val)); 848 mumps->nnz = nz; 849 mumps->irn = row; 850 mumps->jcn = col; 851 mumps->val = mumps->val_alloc = val; 852 } else { 853 val = mumps->val; 854 } 855 856 jj = 0; 857 irow = rstart; 858 for (i = 0; i < mbs; i++) { 859 countA = ai[i + 1] - ai[i]; 860 countB = bi[i + 1] - bi[i]; 861 ajj = aj + ai[i]; 862 bjj = bj + bi[i]; 863 v1 = av + bs2 * ai[i]; 864 v2 = bv + bs2 * bi[i]; 865 866 idx = 0; 867 /* A-part */ 868 for (k = 0; k < countA; k++) { 869 for (j = 0; j < bs; j++) { 870 for (n = 0; n < bs; n++) { 871 if (reuse == MAT_INITIAL_MATRIX) { 872 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 873 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 874 } 875 val[jj++] = v1[idx++]; 876 } 877 } 878 } 879 880 idx = 0; 881 /* B-part */ 882 for (k = 0; k < countB; k++) { 883 for (j = 0; j < bs; j++) { 884 for (n = 0; n < bs; n++) { 885 if (reuse == MAT_INITIAL_MATRIX) { 886 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 887 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 888 } 889 val[jj++] = v2[idx++]; 890 } 891 } 892 } 893 irow += bs; 894 } 895 PetscFunctionReturn(PETSC_SUCCESS); 896 } 897 898 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 899 { 900 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 901 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 902 PetscMUMPSInt *row, *col; 903 const PetscScalar *av, *bv, *v1, *v2; 904 PetscScalar *val; 905 Mat Ad, Ao; 906 Mat_SeqAIJ *aa; 907 Mat_SeqAIJ *bb; 908 #if defined(PETSC_USE_COMPLEX) 909 PetscBool hermitian, isset; 910 #endif 911 912 PetscFunctionBegin; 913 #if defined(PETSC_USE_COMPLEX) 914 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 915 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 916 #endif 917 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 918 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 919 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 920 921 aa = (Mat_SeqAIJ *)(Ad)->data; 922 bb = (Mat_SeqAIJ *)(Ao)->data; 923 ai = aa->i; 924 aj = aa->j; 925 adiag = aa->diag; 926 bi = bb->i; 927 bj = bb->j; 928 929 rstart = A->rmap->rstart; 930 931 if (reuse == MAT_INITIAL_MATRIX) { 932 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 933 nzb = 0; /* num of upper triangular entries in mat->B */ 934 for (i = 0; i < m; i++) { 935 nza += (ai[i + 1] - adiag[i]); 936 countB = bi[i + 1] - bi[i]; 937 bjj = bj + bi[i]; 938 for (j = 0; j < countB; j++) { 939 if (garray[bjj[j]] > rstart) nzb++; 940 } 941 } 942 943 nz = nza + nzb; /* total nz of upper triangular part of mat */ 944 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 945 PetscCall(PetscMalloc1(nz, &val)); 946 mumps->nnz = nz; 947 mumps->irn = row; 948 mumps->jcn = col; 949 mumps->val = mumps->val_alloc = val; 950 } else { 951 val = mumps->val; 952 } 953 954 jj = 0; 955 irow = rstart; 956 for (i = 0; i < m; i++) { 957 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 958 v1 = av + adiag[i]; 959 countA = ai[i + 1] - adiag[i]; 960 countB = bi[i + 1] - bi[i]; 961 bjj = bj + bi[i]; 962 v2 = bv + bi[i]; 963 964 /* A-part */ 965 for (j = 0; j < countA; j++) { 966 if (reuse == MAT_INITIAL_MATRIX) { 967 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 968 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 969 } 970 val[jj++] = v1[j]; 971 } 972 973 /* B-part */ 974 for (j = 0; j < countB; j++) { 975 if (garray[bjj[j]] > rstart) { 976 if (reuse == MAT_INITIAL_MATRIX) { 977 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 978 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 979 } 980 val[jj++] = v2[j]; 981 } 982 } 983 irow++; 984 } 985 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 986 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 987 PetscFunctionReturn(PETSC_SUCCESS); 988 } 989 990 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 991 { 992 const PetscScalar *av; 993 const PetscInt M = A->rmap->n; 994 PetscInt64 i; 995 PetscMUMPSInt *row, *col; 996 Vec v; 997 998 PetscFunctionBegin; 999 PetscCall(MatDiagonalGetDiagonal(A, &v)); 1000 PetscCall(VecGetArrayRead(v, &av)); 1001 if (reuse == MAT_INITIAL_MATRIX) { 1002 PetscCall(PetscMalloc2(M, &row, M, &col)); 1003 for (i = 0; i < M; i++) { 1004 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1005 col[i] = row[i]; 1006 } 1007 mumps->val = (PetscScalar *)av; 1008 mumps->irn = row; 1009 mumps->jcn = col; 1010 mumps->nnz = M; 1011 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1012 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1013 PetscCall(VecRestoreArrayRead(v, &av)); 1014 PetscFunctionReturn(PETSC_SUCCESS); 1015 } 1016 1017 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1018 { 1019 PetscScalar *v; 1020 const PetscInt m = A->rmap->n, N = A->cmap->N; 1021 PetscInt lda; 1022 PetscInt64 i, j; 1023 PetscMUMPSInt *row, *col; 1024 1025 PetscFunctionBegin; 1026 PetscCall(MatDenseGetArray(A, &v)); 1027 PetscCall(MatDenseGetLDA(A, &lda)); 1028 if (reuse == MAT_INITIAL_MATRIX) { 1029 PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 1030 for (i = 0; i < m; i++) { 1031 col[i] = 0; 1032 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1033 } 1034 for (j = 1; j < N; j++) { 1035 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 1036 PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 1037 } 1038 if (lda == m) mumps->val = v; 1039 else { 1040 PetscCall(PetscMalloc1(m * N, &mumps->val)); 1041 mumps->val_alloc = mumps->val; 1042 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1043 } 1044 mumps->irn = row; 1045 mumps->jcn = col; 1046 mumps->nnz = m * N; 1047 } else { 1048 if (lda == m && !mumps->nest_vals) mumps->val = v; 1049 else { 1050 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1051 } 1052 } 1053 PetscCall(MatDenseRestoreArray(A, &v)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1058 { 1059 Mat **mats; 1060 PetscInt nr, nc; 1061 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1062 1063 PetscFunctionBegin; 1064 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1065 if (reuse == MAT_INITIAL_MATRIX) { 1066 PetscMUMPSInt *irns, *jcns; 1067 PetscScalar *vals; 1068 PetscInt64 totnnz, cumnnz, maxnnz; 1069 PetscInt *pjcns_w; 1070 IS *rows, *cols; 1071 PetscInt **rows_idx, **cols_idx; 1072 1073 cumnnz = 0; 1074 maxnnz = 0; 1075 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1076 for (PetscInt r = 0; r < nr; r++) { 1077 for (PetscInt c = 0; c < nc; c++) { 1078 Mat sub = mats[r][c]; 1079 1080 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1081 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1082 if (sub) { 1083 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1084 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE, isDiag, isDense; 1085 MatInfo info; 1086 1087 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1088 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1089 else { 1090 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1091 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1092 } 1093 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1094 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1095 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1096 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1097 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1098 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1099 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1100 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 1101 1102 if (chol) { 1103 if (r == c) { 1104 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1105 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1106 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1107 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1108 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1109 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1110 } else { 1111 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1112 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1113 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1114 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1115 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1116 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1117 } 1118 } else { 1119 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1120 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1121 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1122 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1123 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1124 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1125 } 1126 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1127 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1128 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1129 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1130 maxnnz = PetscMax(maxnnz, info.nz_used); 1131 } 1132 } 1133 } 1134 1135 /* Allocate total COO */ 1136 totnnz = cumnnz; 1137 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1138 PetscCall(PetscMalloc1(totnnz, &vals)); 1139 1140 /* Handle rows and column maps 1141 We directly map rows and use an SF for the columns */ 1142 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1143 PetscCall(MatNestGetISs(A, rows, cols)); 1144 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1145 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1146 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1147 else (void)maxnnz; 1148 1149 cumnnz = 0; 1150 for (PetscInt r = 0; r < nr; r++) { 1151 for (PetscInt c = 0; c < nc; c++) { 1152 Mat sub = mats[r][c]; 1153 const PetscInt *ridx = rows_idx[r]; 1154 const PetscInt *cidx = cols_idx[c]; 1155 PetscInt rst; 1156 PetscSF csf; 1157 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1158 PetscLayout cmap; 1159 1160 mumps->nest_vals_start[r * nc + c] = cumnnz; 1161 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1162 1163 /* Extract inner blocks if needed */ 1164 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1165 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1166 else { 1167 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1168 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1169 } 1170 swap = (PetscBool)(isTrans || isHTrans); 1171 1172 /* Get column layout to map off-process columns */ 1173 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1174 1175 /* Get row start to map on-process rows */ 1176 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1177 1178 /* Directly use the mumps datastructure and use C ordering for now */ 1179 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1180 1181 /* Swap the role of rows and columns indices for transposed blocks 1182 since we need values with global final ordering */ 1183 if (swap) { 1184 cidx = rows_idx[r]; 1185 ridx = cols_idx[c]; 1186 } 1187 1188 /* Communicate column indices 1189 This could have been done with a single SF but it would have complicated the code a lot. 1190 But since we do it only once, we pay the price of setting up an SF for each block */ 1191 if (PetscDefined(USE_64BIT_INDICES)) { 1192 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1193 } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */ 1194 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1195 PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1196 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1197 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1198 PetscCall(PetscSFDestroy(&csf)); 1199 1200 /* Import indices: use direct map for rows and mapped indices for columns */ 1201 if (swap) { 1202 for (PetscInt k = 0; k < mumps->nnz; k++) { 1203 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1204 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1205 } 1206 } else { 1207 for (PetscInt k = 0; k < mumps->nnz; k++) { 1208 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1209 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1210 } 1211 } 1212 1213 /* Import values to full COO */ 1214 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1215 if (isHTrans) { /* conjugate the entries */ 1216 PetscScalar *v = vals + cumnnz; 1217 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]); 1218 } 1219 1220 /* Shift new starting point and sanity check */ 1221 cumnnz += mumps->nnz; 1222 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1223 1224 /* Free scratch memory */ 1225 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1226 PetscCall(PetscFree(mumps->val_alloc)); 1227 mumps->val = NULL; 1228 mumps->nnz = 0; 1229 } 1230 } 1231 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1232 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1233 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1234 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1235 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1236 mumps->nest_vals_start[nr * nc] = cumnnz; 1237 1238 /* Set pointers for final MUMPS data structure */ 1239 mumps->nest_vals = vals; 1240 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1241 mumps->val = vals; 1242 mumps->irn = irns; 1243 mumps->jcn = jcns; 1244 mumps->nnz = cumnnz; 1245 } else { 1246 PetscScalar *oval = mumps->nest_vals; 1247 for (PetscInt r = 0; r < nr; r++) { 1248 for (PetscInt c = 0; c < nc; c++) { 1249 PetscBool isTrans, isHTrans = PETSC_FALSE; 1250 Mat sub = mats[r][c]; 1251 PetscInt midx = r * nc + c; 1252 1253 if (!mumps->nest_convert_to_triples[midx]) continue; 1254 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1255 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1256 else { 1257 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1258 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1259 } 1260 mumps->val = oval + mumps->nest_vals_start[midx]; 1261 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1262 if (isHTrans) { 1263 PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1264 for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1265 } 1266 } 1267 } 1268 mumps->val = oval; 1269 } 1270 PetscFunctionReturn(PETSC_SUCCESS); 1271 } 1272 1273 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1274 { 1275 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1276 1277 PetscFunctionBegin; 1278 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1279 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1280 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1281 PetscCall(VecDestroy(&mumps->b_seq)); 1282 PetscCall(VecDestroy(&mumps->x_seq)); 1283 PetscCall(PetscFree(mumps->id.perm_in)); 1284 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1285 PetscCall(PetscFree(mumps->val_alloc)); 1286 PetscCall(PetscFree(mumps->info)); 1287 PetscCall(PetscFree(mumps->ICNTL_pre)); 1288 PetscCall(PetscFree(mumps->CNTL_pre)); 1289 PetscCall(MatMumpsResetSchur_Private(mumps)); 1290 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1291 mumps->id.job = JOB_END; 1292 PetscMUMPS_c(mumps); 1293 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1294 if (mumps->mumps_comm != MPI_COMM_NULL) { 1295 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1296 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1297 } 1298 } 1299 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1300 if (mumps->use_petsc_omp_support) { 1301 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1302 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1303 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1304 } 1305 #endif 1306 PetscCall(PetscFree(mumps->ia_alloc)); 1307 PetscCall(PetscFree(mumps->ja_alloc)); 1308 PetscCall(PetscFree(mumps->recvcount)); 1309 PetscCall(PetscFree(mumps->reqs)); 1310 PetscCall(PetscFree(mumps->irhs_loc)); 1311 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1312 PetscCall(PetscFree(mumps->nest_vals)); 1313 PetscCall(PetscFree(A->data)); 1314 1315 /* clear composed functions */ 1316 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1317 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1318 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1319 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1320 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1321 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1322 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1323 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1324 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1325 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1326 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1327 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1328 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1329 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1330 PetscFunctionReturn(PETSC_SUCCESS); 1331 } 1332 1333 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1334 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1335 { 1336 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1337 const PetscMPIInt ompsize = mumps->omp_comm_size; 1338 PetscInt i, m, M, rstart; 1339 1340 PetscFunctionBegin; 1341 PetscCall(MatGetSize(A, &M, NULL)); 1342 PetscCall(MatGetLocalSize(A, &m, NULL)); 1343 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1344 if (ompsize == 1) { 1345 if (!mumps->irhs_loc) { 1346 mumps->nloc_rhs = m; 1347 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1348 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1349 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1350 } 1351 mumps->id.rhs_loc = (MumpsScalar *)array; 1352 } else { 1353 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1354 const PetscInt *ranges; 1355 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1356 MPI_Group petsc_group, omp_group; 1357 PetscScalar *recvbuf = NULL; 1358 1359 if (mumps->is_omp_master) { 1360 /* Lazily initialize the omp stuff for distributed rhs */ 1361 if (!mumps->irhs_loc) { 1362 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1363 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1364 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1365 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1366 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1367 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1368 1369 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1370 mumps->nloc_rhs = 0; 1371 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1372 for (j = 0; j < ompsize; j++) { 1373 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1374 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1375 } 1376 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1377 for (j = k = 0; j < ompsize; j++) { 1378 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1379 } 1380 1381 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1382 PetscCallMPI(MPI_Group_free(&petsc_group)); 1383 PetscCallMPI(MPI_Group_free(&omp_group)); 1384 } 1385 1386 /* Realloc buffers when current nrhs is bigger than what we have met */ 1387 if (nrhs > mumps->max_nrhs) { 1388 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1389 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1390 mumps->max_nrhs = nrhs; 1391 } 1392 1393 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1394 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1395 mumps->rhs_disps[0] = 0; 1396 for (j = 1; j < ompsize; j++) { 1397 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1398 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1399 } 1400 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1401 } 1402 1403 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1404 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1405 1406 if (mumps->is_omp_master) { 1407 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1408 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1409 for (j = 0; j < ompsize; j++) { 1410 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1411 dst = dstbase; 1412 for (i = 0; i < nrhs; i++) { 1413 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1414 src += mumps->rhs_nrow[j]; 1415 dst += mumps->nloc_rhs; 1416 } 1417 dstbase += mumps->rhs_nrow[j]; 1418 } 1419 } 1420 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1421 } 1422 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1423 } 1424 mumps->id.nrhs = nrhs; 1425 mumps->id.nloc_rhs = mumps->nloc_rhs; 1426 mumps->id.lrhs_loc = mumps->nloc_rhs; 1427 mumps->id.irhs_loc = mumps->irhs_loc; 1428 PetscFunctionReturn(PETSC_SUCCESS); 1429 } 1430 1431 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1432 { 1433 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1434 const PetscScalar *rarray = NULL; 1435 PetscScalar *array; 1436 IS is_iden, is_petsc; 1437 PetscInt i; 1438 PetscBool second_solve = PETSC_FALSE; 1439 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1440 1441 PetscFunctionBegin; 1442 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1443 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1444 &cite1)); 1445 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1446 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1447 &cite2)); 1448 1449 PetscCall(VecFlag(x, A->factorerrortype)); 1450 if (A->factorerrortype) { 1451 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1452 PetscFunctionReturn(PETSC_SUCCESS); 1453 } 1454 1455 mumps->id.nrhs = 1; 1456 if (mumps->petsc_size > 1) { 1457 if (mumps->ICNTL20 == 10) { 1458 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1459 PetscCall(VecGetArrayRead(b, &rarray)); 1460 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1461 } else { 1462 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1463 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1464 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1465 if (!mumps->myid) { 1466 PetscCall(VecGetArray(mumps->b_seq, &array)); 1467 mumps->id.rhs = (MumpsScalar *)array; 1468 } 1469 } 1470 } else { /* petsc_size == 1 */ 1471 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1472 PetscCall(VecCopy(b, x)); 1473 PetscCall(VecGetArray(x, &array)); 1474 mumps->id.rhs = (MumpsScalar *)array; 1475 } 1476 1477 /* 1478 handle condensation step of Schur complement (if any) 1479 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1480 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1481 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1482 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1483 */ 1484 if (mumps->id.size_schur > 0) { 1485 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1486 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1487 second_solve = PETSC_TRUE; 1488 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1489 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1490 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1491 } 1492 /* solve phase */ 1493 mumps->id.job = JOB_SOLVE; 1494 PetscMUMPS_c(mumps); 1495 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1496 1497 /* handle expansion step of Schur complement (if any) */ 1498 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1499 else if (mumps->id.ICNTL(26) == 1) { 1500 PetscCall(MatMumpsSolveSchur_Private(A)); 1501 for (i = 0; i < mumps->id.size_schur; ++i) { 1502 #if !defined(PETSC_USE_COMPLEX) 1503 PetscScalar val = mumps->id.redrhs[i]; 1504 #else 1505 PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i; 1506 #endif 1507 array[mumps->id.listvar_schur[i] - 1] = val; 1508 } 1509 } 1510 1511 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1512 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1513 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1514 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1515 } 1516 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1517 PetscInt *isol2_loc = NULL; 1518 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1519 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1520 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1521 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1522 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1523 PetscCall(ISDestroy(&is_iden)); 1524 PetscCall(ISDestroy(&is_petsc)); 1525 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1526 } 1527 1528 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1529 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1530 } 1531 1532 if (mumps->petsc_size > 1) { 1533 if (mumps->ICNTL20 == 10) { 1534 PetscCall(VecRestoreArrayRead(b, &rarray)); 1535 } else if (!mumps->myid) { 1536 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1537 } 1538 } else PetscCall(VecRestoreArray(x, &array)); 1539 1540 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1541 PetscFunctionReturn(PETSC_SUCCESS); 1542 } 1543 1544 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1545 { 1546 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1547 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1548 1549 PetscFunctionBegin; 1550 mumps->id.ICNTL(9) = 0; 1551 PetscCall(MatSolve_MUMPS(A, b, x)); 1552 mumps->id.ICNTL(9) = value; 1553 PetscFunctionReturn(PETSC_SUCCESS); 1554 } 1555 1556 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1557 { 1558 Mat Bt = NULL; 1559 PetscBool denseX, denseB, flg, flgT; 1560 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1561 PetscInt i, nrhs, M; 1562 PetscScalar *array; 1563 const PetscScalar *rbray; 1564 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1565 PetscMUMPSInt *isol_loc, *isol_loc_save; 1566 PetscScalar *bray, *sol_loc, *sol_loc_save; 1567 IS is_to, is_from; 1568 PetscInt k, proc, j, m, myrstart; 1569 const PetscInt *rstart; 1570 Vec v_mpi, msol_loc; 1571 VecScatter scat_sol; 1572 Vec b_seq; 1573 VecScatter scat_rhs; 1574 PetscScalar *aa; 1575 PetscInt spnr, *ia, *ja; 1576 Mat_MPIAIJ *b = NULL; 1577 1578 PetscFunctionBegin; 1579 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1580 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1581 1582 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1583 if (denseB) { 1584 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1585 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1586 } else { /* sparse B */ 1587 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1588 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1589 if (flgT) { /* input B is transpose of actual RHS matrix, 1590 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1591 PetscCall(MatTransposeGetMat(B, &Bt)); 1592 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1593 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1594 } 1595 1596 PetscCall(MatGetSize(B, &M, &nrhs)); 1597 mumps->id.nrhs = nrhs; 1598 mumps->id.lrhs = M; 1599 mumps->id.rhs = NULL; 1600 1601 if (mumps->petsc_size == 1) { 1602 PetscScalar *aa; 1603 PetscInt spnr, *ia, *ja; 1604 PetscBool second_solve = PETSC_FALSE; 1605 1606 PetscCall(MatDenseGetArray(X, &array)); 1607 mumps->id.rhs = (MumpsScalar *)array; 1608 1609 if (denseB) { 1610 /* copy B to X */ 1611 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1612 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1613 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1614 } else { /* sparse B */ 1615 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1616 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1617 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1618 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1619 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1620 } 1621 /* handle condensation step of Schur complement (if any) */ 1622 if (mumps->id.size_schur > 0) { 1623 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1624 second_solve = PETSC_TRUE; 1625 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1626 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1627 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1628 } 1629 /* solve phase */ 1630 mumps->id.job = JOB_SOLVE; 1631 PetscMUMPS_c(mumps); 1632 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1633 1634 /* handle expansion step of Schur complement (if any) */ 1635 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1636 else if (mumps->id.ICNTL(26) == 1) { 1637 PetscCall(MatMumpsSolveSchur_Private(A)); 1638 for (j = 0; j < nrhs; ++j) 1639 for (i = 0; i < mumps->id.size_schur; ++i) { 1640 #if !defined(PETSC_USE_COMPLEX) 1641 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs]; 1642 #else 1643 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i; 1644 #endif 1645 array[mumps->id.listvar_schur[i] - 1 + j * M] = val; 1646 } 1647 } 1648 if (!denseB) { /* sparse B */ 1649 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1650 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1651 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1652 } 1653 PetscCall(MatDenseRestoreArray(X, &array)); 1654 PetscFunctionReturn(PETSC_SUCCESS); 1655 } 1656 1657 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1658 PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1659 1660 /* create msol_loc to hold mumps local solution */ 1661 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1662 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1663 1664 lsol_loc = mumps->id.lsol_loc; 1665 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1666 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1667 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1668 mumps->id.isol_loc = isol_loc; 1669 1670 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1671 1672 if (denseB) { 1673 if (mumps->ICNTL20 == 10) { 1674 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1675 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1676 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1677 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1678 PetscCall(MatGetLocalSize(B, &m, NULL)); 1679 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1680 } else { 1681 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1682 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1683 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1684 0, re-arrange B into desired order, which is a local operation. 1685 */ 1686 1687 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1688 /* wrap dense rhs matrix B into a vector v_mpi */ 1689 PetscCall(MatGetLocalSize(B, &m, NULL)); 1690 PetscCall(MatDenseGetArray(B, &bray)); 1691 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1692 PetscCall(MatDenseRestoreArray(B, &bray)); 1693 1694 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1695 if (!mumps->myid) { 1696 PetscInt *idx; 1697 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1698 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1699 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1700 k = 0; 1701 for (proc = 0; proc < mumps->petsc_size; proc++) { 1702 for (j = 0; j < nrhs; j++) { 1703 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1704 } 1705 } 1706 1707 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1708 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1709 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1710 } else { 1711 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1712 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1713 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1714 } 1715 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1716 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1717 PetscCall(ISDestroy(&is_to)); 1718 PetscCall(ISDestroy(&is_from)); 1719 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1720 1721 if (!mumps->myid) { /* define rhs on the host */ 1722 PetscCall(VecGetArray(b_seq, &bray)); 1723 mumps->id.rhs = (MumpsScalar *)bray; 1724 PetscCall(VecRestoreArray(b_seq, &bray)); 1725 } 1726 } 1727 } else { /* sparse B */ 1728 b = (Mat_MPIAIJ *)Bt->data; 1729 1730 /* wrap dense X into a vector v_mpi */ 1731 PetscCall(MatGetLocalSize(X, &m, NULL)); 1732 PetscCall(MatDenseGetArray(X, &bray)); 1733 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1734 PetscCall(MatDenseRestoreArray(X, &bray)); 1735 1736 if (!mumps->myid) { 1737 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1738 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1739 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1740 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1741 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1742 } else { 1743 mumps->id.irhs_ptr = NULL; 1744 mumps->id.irhs_sparse = NULL; 1745 mumps->id.nz_rhs = 0; 1746 mumps->id.rhs_sparse = NULL; 1747 } 1748 } 1749 1750 /* solve phase */ 1751 mumps->id.job = JOB_SOLVE; 1752 PetscMUMPS_c(mumps); 1753 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1754 1755 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1756 PetscCall(MatDenseGetArray(X, &array)); 1757 PetscCall(VecPlaceArray(v_mpi, array)); 1758 1759 /* create scatter scat_sol */ 1760 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1761 /* iidx: index for scatter mumps solution to petsc X */ 1762 1763 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1764 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1765 for (i = 0; i < lsol_loc; i++) { 1766 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1767 1768 for (proc = 0; proc < mumps->petsc_size; proc++) { 1769 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1770 myrstart = rstart[proc]; 1771 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1772 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1773 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1774 break; 1775 } 1776 } 1777 1778 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1779 } 1780 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1781 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1782 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1783 PetscCall(ISDestroy(&is_from)); 1784 PetscCall(ISDestroy(&is_to)); 1785 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1786 PetscCall(MatDenseRestoreArray(X, &array)); 1787 1788 /* free spaces */ 1789 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1790 mumps->id.isol_loc = isol_loc_save; 1791 1792 PetscCall(PetscFree2(sol_loc, isol_loc)); 1793 PetscCall(PetscFree(idxx)); 1794 PetscCall(VecDestroy(&msol_loc)); 1795 PetscCall(VecDestroy(&v_mpi)); 1796 if (!denseB) { 1797 if (!mumps->myid) { 1798 b = (Mat_MPIAIJ *)Bt->data; 1799 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1800 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1801 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1802 } 1803 } else { 1804 if (mumps->ICNTL20 == 0) { 1805 PetscCall(VecDestroy(&b_seq)); 1806 PetscCall(VecScatterDestroy(&scat_rhs)); 1807 } 1808 } 1809 PetscCall(VecScatterDestroy(&scat_sol)); 1810 PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)))); 1811 PetscFunctionReturn(PETSC_SUCCESS); 1812 } 1813 1814 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1815 { 1816 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1817 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1818 1819 PetscFunctionBegin; 1820 mumps->id.ICNTL(9) = 0; 1821 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1822 mumps->id.ICNTL(9) = value; 1823 PetscFunctionReturn(PETSC_SUCCESS); 1824 } 1825 1826 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1827 { 1828 PetscBool flg; 1829 Mat B; 1830 1831 PetscFunctionBegin; 1832 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1833 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1834 1835 /* Create B=Bt^T that uses Bt's data structure */ 1836 PetscCall(MatCreateTranspose(Bt, &B)); 1837 1838 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1839 PetscCall(MatDestroy(&B)); 1840 PetscFunctionReturn(PETSC_SUCCESS); 1841 } 1842 1843 #if !defined(PETSC_USE_COMPLEX) 1844 /* 1845 input: 1846 F: numeric factor 1847 output: 1848 nneg: total number of negative pivots 1849 nzero: total number of zero pivots 1850 npos: (global dimension of F) - nneg - nzero 1851 */ 1852 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1853 { 1854 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1855 PetscMPIInt size; 1856 1857 PetscFunctionBegin; 1858 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1859 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1860 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1861 1862 if (nneg) *nneg = mumps->id.INFOG(12); 1863 if (nzero || npos) { 1864 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1865 if (nzero) *nzero = mumps->id.INFOG(28); 1866 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1867 } 1868 PetscFunctionReturn(PETSC_SUCCESS); 1869 } 1870 #endif 1871 1872 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1873 { 1874 PetscInt i, nreqs; 1875 PetscMUMPSInt *irn, *jcn; 1876 PetscMPIInt count; 1877 PetscInt64 totnnz, remain; 1878 const PetscInt osize = mumps->omp_comm_size; 1879 PetscScalar *val; 1880 1881 PetscFunctionBegin; 1882 if (osize > 1) { 1883 if (reuse == MAT_INITIAL_MATRIX) { 1884 /* master first gathers counts of nonzeros to receive */ 1885 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1886 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1887 1888 /* Then each computes number of send/recvs */ 1889 if (mumps->is_omp_master) { 1890 /* Start from 1 since self communication is not done in MPI */ 1891 nreqs = 0; 1892 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1893 } else { 1894 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1895 } 1896 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1897 1898 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1899 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1900 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1901 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1902 */ 1903 nreqs = 0; /* counter for actual send/recvs */ 1904 if (mumps->is_omp_master) { 1905 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1906 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1907 PetscCall(PetscMalloc1(totnnz, &val)); 1908 1909 /* Self communication */ 1910 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1911 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1912 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1913 1914 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1915 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1916 PetscCall(PetscFree(mumps->val_alloc)); 1917 mumps->nnz = totnnz; 1918 mumps->irn = irn; 1919 mumps->jcn = jcn; 1920 mumps->val = mumps->val_alloc = val; 1921 1922 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1923 jcn += mumps->recvcount[0]; 1924 val += mumps->recvcount[0]; 1925 1926 /* Remote communication */ 1927 for (i = 1; i < osize; i++) { 1928 count = PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 1929 remain = mumps->recvcount[i] - count; 1930 while (count > 0) { 1931 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1932 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1933 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1934 irn += count; 1935 jcn += count; 1936 val += count; 1937 count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1938 remain -= count; 1939 } 1940 } 1941 } else { 1942 irn = mumps->irn; 1943 jcn = mumps->jcn; 1944 val = mumps->val; 1945 count = PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 1946 remain = mumps->nnz - count; 1947 while (count > 0) { 1948 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1949 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1950 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1951 irn += count; 1952 jcn += count; 1953 val += count; 1954 count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1955 remain -= count; 1956 } 1957 } 1958 } else { 1959 nreqs = 0; 1960 if (mumps->is_omp_master) { 1961 val = mumps->val + mumps->recvcount[0]; 1962 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1963 count = PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 1964 remain = mumps->recvcount[i] - count; 1965 while (count > 0) { 1966 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1967 val += count; 1968 count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1969 remain -= count; 1970 } 1971 } 1972 } else { 1973 val = mumps->val; 1974 count = PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 1975 remain = mumps->nnz - count; 1976 while (count > 0) { 1977 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1978 val += count; 1979 count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1980 remain -= count; 1981 } 1982 } 1983 } 1984 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1985 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1986 } 1987 PetscFunctionReturn(PETSC_SUCCESS); 1988 } 1989 1990 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info) 1991 { 1992 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1993 PetscBool isMPIAIJ; 1994 1995 PetscFunctionBegin; 1996 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1997 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1998 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1999 PetscFunctionReturn(PETSC_SUCCESS); 2000 } 2001 2002 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 2003 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 2004 2005 /* numerical factorization phase */ 2006 mumps->id.job = JOB_FACTNUMERIC; 2007 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2008 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 2009 } else { 2010 mumps->id.a_loc = (MumpsScalar *)mumps->val; 2011 } 2012 PetscMUMPS_c(mumps); 2013 if (mumps->id.INFOG(1) < 0) { 2014 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2015 if (mumps->id.INFOG(1) == -10) { 2016 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2017 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2018 } else if (mumps->id.INFOG(1) == -13) { 2019 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2020 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2021 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2022 PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2023 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2024 } else { 2025 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2026 F->factorerrortype = MAT_FACTOR_OTHER; 2027 } 2028 } 2029 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2030 2031 F->assembled = PETSC_TRUE; 2032 2033 if (F->schur) { /* reset Schur status to unfactored */ 2034 #if defined(PETSC_HAVE_CUDA) 2035 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 2036 #endif 2037 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2038 mumps->id.ICNTL(19) = 2; 2039 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2040 } 2041 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2042 } 2043 2044 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2045 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2046 2047 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2048 if (mumps->petsc_size > 1) { 2049 PetscInt lsol_loc; 2050 PetscScalar *sol_loc; 2051 2052 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 2053 2054 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2055 if (mumps->x_seq) { 2056 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2057 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 2058 PetscCall(VecDestroy(&mumps->x_seq)); 2059 } 2060 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2061 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 2062 mumps->id.lsol_loc = lsol_loc; 2063 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 2064 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 2065 } 2066 PetscCall(PetscLogFlops((double)mumps->id.RINFO(2))); 2067 PetscFunctionReturn(PETSC_SUCCESS); 2068 } 2069 2070 /* Sets MUMPS options from the options database */ 2071 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2072 { 2073 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2074 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2075 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2076 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 2077 MumpsScalar *arr; 2078 2079 PetscFunctionBegin; 2080 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2081 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2082 PetscInt nthreads = 0; 2083 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2084 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2085 2086 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2087 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2088 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2089 2090 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2091 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2092 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2093 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2094 if (mumps->use_petsc_omp_support) { 2095 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2096 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2097 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2098 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2099 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2100 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2101 #endif 2102 } else { 2103 mumps->omp_comm = PETSC_COMM_SELF; 2104 mumps->mumps_comm = mumps->petsc_comm; 2105 mumps->is_omp_master = PETSC_TRUE; 2106 } 2107 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2108 mumps->reqs = NULL; 2109 mumps->tag = 0; 2110 2111 if (mumps->mumps_comm != MPI_COMM_NULL) { 2112 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2113 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2114 MPI_Comm comm; 2115 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2116 mumps->mumps_comm = comm; 2117 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2118 } 2119 2120 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2121 mumps->id.job = JOB_INIT; 2122 mumps->id.par = 1; /* host participates factorizaton and solve */ 2123 mumps->id.sym = mumps->sym; 2124 2125 size = mumps->id.size_schur; 2126 arr = mumps->id.schur; 2127 listvar_schur = mumps->id.listvar_schur; 2128 PetscMUMPS_c(mumps); 2129 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2130 2131 /* set PETSc-MUMPS default options - override MUMPS default */ 2132 mumps->id.ICNTL(3) = 0; 2133 mumps->id.ICNTL(4) = 0; 2134 if (mumps->petsc_size == 1) { 2135 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2136 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2137 } else { 2138 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2139 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2140 } 2141 2142 /* restore cached ICNTL and CNTL values */ 2143 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2144 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2145 PetscCall(PetscFree(mumps->ICNTL_pre)); 2146 PetscCall(PetscFree(mumps->CNTL_pre)); 2147 2148 if (schur) { 2149 mumps->id.size_schur = size; 2150 mumps->id.schur_lld = size; 2151 mumps->id.schur = arr; 2152 mumps->id.listvar_schur = listvar_schur; 2153 if (mumps->petsc_size > 1) { 2154 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2155 2156 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2157 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2158 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2159 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2160 } else { 2161 if (F->factortype == MAT_FACTOR_LU) { 2162 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2163 } else { 2164 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2165 } 2166 } 2167 mumps->id.ICNTL(26) = -1; 2168 } 2169 2170 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2171 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2172 */ 2173 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2174 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2175 2176 mumps->scat_rhs = NULL; 2177 mumps->scat_sol = NULL; 2178 } 2179 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2180 if (flg) mumps->id.ICNTL(1) = icntl; 2181 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2182 if (flg) mumps->id.ICNTL(2) = icntl; 2183 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2184 if (flg) mumps->id.ICNTL(3) = icntl; 2185 2186 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2187 if (flg) mumps->id.ICNTL(4) = icntl; 2188 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2189 2190 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2191 if (flg) mumps->id.ICNTL(6) = icntl; 2192 2193 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2194 if (flg) { 2195 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2196 mumps->id.ICNTL(7) = icntl; 2197 } 2198 2199 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2200 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2201 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2202 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2203 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2204 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2205 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2206 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2207 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2208 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2209 if (flg) { 2210 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2211 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2212 } 2213 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2214 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2215 PetscCall(MatDestroy(&F->schur)); 2216 PetscCall(MatMumpsResetSchur_Private(mumps)); 2217 } 2218 2219 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2220 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2221 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2222 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2223 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2224 In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 2225 */ 2226 #if PETSC_PKG_MUMPS_VERSION_GE(5, 6, 2) && defined(PETSC_HAVE_MUMPS_AVOID_MPI_IN_PLACE) 2227 mumps->ICNTL20 = 10; 2228 #elif PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH) && (MPICH_NUMVERSION < 40000101)) 2229 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2230 #else 2231 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2232 #endif 2233 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2234 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2235 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2236 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2237 #endif 2238 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2239 2240 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2241 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2242 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2243 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2244 2245 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2246 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2247 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2248 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2249 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2250 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2251 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2252 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2253 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2254 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2255 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2256 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2257 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL)); 2258 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2259 2260 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2261 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2262 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2263 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2264 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2265 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2266 2267 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2268 2269 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2270 if (ninfo) { 2271 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2272 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2273 mumps->ninfo = ninfo; 2274 for (i = 0; i < ninfo; i++) { 2275 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2276 mumps->info[i] = info[i]; 2277 } 2278 } 2279 PetscOptionsEnd(); 2280 PetscFunctionReturn(PETSC_SUCCESS); 2281 } 2282 2283 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps) 2284 { 2285 PetscFunctionBegin; 2286 if (mumps->id.INFOG(1) < 0) { 2287 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2288 if (mumps->id.INFOG(1) == -6) { 2289 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2290 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2291 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2292 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2293 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2294 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2295 PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n")); 2296 } else { 2297 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2298 F->factorerrortype = MAT_FACTOR_OTHER; 2299 } 2300 } 2301 if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR; 2302 PetscFunctionReturn(PETSC_SUCCESS); 2303 } 2304 2305 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2306 { 2307 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2308 Vec b; 2309 const PetscInt M = A->rmap->N; 2310 2311 PetscFunctionBegin; 2312 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2313 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2314 PetscFunctionReturn(PETSC_SUCCESS); 2315 } 2316 2317 /* Set MUMPS options from the options database */ 2318 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2319 2320 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2321 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2322 2323 /* analysis phase */ 2324 mumps->id.job = JOB_FACTSYMBOLIC; 2325 mumps->id.n = M; 2326 switch (mumps->id.ICNTL(18)) { 2327 case 0: /* centralized assembled matrix input */ 2328 if (!mumps->myid) { 2329 mumps->id.nnz = mumps->nnz; 2330 mumps->id.irn = mumps->irn; 2331 mumps->id.jcn = mumps->jcn; 2332 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2333 if (r && mumps->id.ICNTL(7) == 7) { 2334 mumps->id.ICNTL(7) = 1; 2335 if (!mumps->myid) { 2336 const PetscInt *idx; 2337 PetscInt i; 2338 2339 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2340 PetscCall(ISGetIndices(r, &idx)); 2341 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */ 2342 PetscCall(ISRestoreIndices(r, &idx)); 2343 } 2344 } 2345 } 2346 break; 2347 case 3: /* distributed assembled matrix input (size>1) */ 2348 mumps->id.nnz_loc = mumps->nnz; 2349 mumps->id.irn_loc = mumps->irn; 2350 mumps->id.jcn_loc = mumps->jcn; 2351 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2352 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2353 PetscCall(MatCreateVecs(A, NULL, &b)); 2354 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2355 PetscCall(VecDestroy(&b)); 2356 } 2357 break; 2358 } 2359 PetscMUMPS_c(mumps); 2360 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2361 2362 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2363 F->ops->solve = MatSolve_MUMPS; 2364 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2365 F->ops->matsolve = MatMatSolve_MUMPS; 2366 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2367 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2368 2369 mumps->matstruc = SAME_NONZERO_PATTERN; 2370 PetscFunctionReturn(PETSC_SUCCESS); 2371 } 2372 2373 /* Note the Petsc r and c permutations are ignored */ 2374 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2375 { 2376 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2377 Vec b; 2378 const PetscInt M = A->rmap->N; 2379 2380 PetscFunctionBegin; 2381 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2382 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2383 PetscFunctionReturn(PETSC_SUCCESS); 2384 } 2385 2386 /* Set MUMPS options from the options database */ 2387 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2388 2389 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2390 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2391 2392 /* analysis phase */ 2393 mumps->id.job = JOB_FACTSYMBOLIC; 2394 mumps->id.n = M; 2395 switch (mumps->id.ICNTL(18)) { 2396 case 0: /* centralized assembled matrix input */ 2397 if (!mumps->myid) { 2398 mumps->id.nnz = mumps->nnz; 2399 mumps->id.irn = mumps->irn; 2400 mumps->id.jcn = mumps->jcn; 2401 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2402 } 2403 break; 2404 case 3: /* distributed assembled matrix input (size>1) */ 2405 mumps->id.nnz_loc = mumps->nnz; 2406 mumps->id.irn_loc = mumps->irn; 2407 mumps->id.jcn_loc = mumps->jcn; 2408 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2409 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2410 PetscCall(MatCreateVecs(A, NULL, &b)); 2411 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2412 PetscCall(VecDestroy(&b)); 2413 } 2414 break; 2415 } 2416 PetscMUMPS_c(mumps); 2417 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2418 2419 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2420 F->ops->solve = MatSolve_MUMPS; 2421 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2422 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2423 2424 mumps->matstruc = SAME_NONZERO_PATTERN; 2425 PetscFunctionReturn(PETSC_SUCCESS); 2426 } 2427 2428 /* Note the Petsc r permutation and factor info are ignored */ 2429 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info) 2430 { 2431 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2432 Vec b; 2433 const PetscInt M = A->rmap->N; 2434 2435 PetscFunctionBegin; 2436 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2437 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2438 PetscFunctionReturn(PETSC_SUCCESS); 2439 } 2440 2441 /* Set MUMPS options from the options database */ 2442 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2443 2444 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2445 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2446 2447 /* analysis phase */ 2448 mumps->id.job = JOB_FACTSYMBOLIC; 2449 mumps->id.n = M; 2450 switch (mumps->id.ICNTL(18)) { 2451 case 0: /* centralized assembled matrix input */ 2452 if (!mumps->myid) { 2453 mumps->id.nnz = mumps->nnz; 2454 mumps->id.irn = mumps->irn; 2455 mumps->id.jcn = mumps->jcn; 2456 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2457 } 2458 break; 2459 case 3: /* distributed assembled matrix input (size>1) */ 2460 mumps->id.nnz_loc = mumps->nnz; 2461 mumps->id.irn_loc = mumps->irn; 2462 mumps->id.jcn_loc = mumps->jcn; 2463 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2464 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2465 PetscCall(MatCreateVecs(A, NULL, &b)); 2466 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2467 PetscCall(VecDestroy(&b)); 2468 } 2469 break; 2470 } 2471 PetscMUMPS_c(mumps); 2472 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2473 2474 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2475 F->ops->solve = MatSolve_MUMPS; 2476 F->ops->solvetranspose = MatSolve_MUMPS; 2477 F->ops->matsolve = MatMatSolve_MUMPS; 2478 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2479 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2480 #if defined(PETSC_USE_COMPLEX) 2481 F->ops->getinertia = NULL; 2482 #else 2483 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2484 #endif 2485 2486 mumps->matstruc = SAME_NONZERO_PATTERN; 2487 PetscFunctionReturn(PETSC_SUCCESS); 2488 } 2489 2490 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2491 { 2492 PetscBool iascii; 2493 PetscViewerFormat format; 2494 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2495 2496 PetscFunctionBegin; 2497 /* check if matrix is mumps type */ 2498 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2499 2500 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2501 if (iascii) { 2502 PetscCall(PetscViewerGetFormat(viewer, &format)); 2503 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2504 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2505 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2506 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2507 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2508 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2509 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2510 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2511 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2512 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2513 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2514 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2515 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2516 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2517 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2518 if (mumps->id.ICNTL(11) > 0) { 2519 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)mumps->id.RINFOG(4))); 2520 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)mumps->id.RINFOG(5))); 2521 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)mumps->id.RINFOG(6))); 2522 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)mumps->id.RINFOG(7), (double)mumps->id.RINFOG(8))); 2523 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)mumps->id.RINFOG(9))); 2524 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)mumps->id.RINFOG(10), (double)mumps->id.RINFOG(11))); 2525 } 2526 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2527 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2528 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2529 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2530 /* ICNTL(15-17) not used */ 2531 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2532 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2533 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2534 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2535 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2536 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2537 2538 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2539 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2540 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2541 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2542 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2543 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2544 2545 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2546 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2547 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2548 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2549 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2550 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2551 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(48) (multithreading with tree parallelism): %d\n", mumps->id.ICNTL(48))); 2552 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2553 2554 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)mumps->id.CNTL(1))); 2555 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)mumps->id.CNTL(2))); 2556 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)mumps->id.CNTL(3))); 2557 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)mumps->id.CNTL(4))); 2558 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)mumps->id.CNTL(5))); 2559 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)mumps->id.CNTL(7))); 2560 2561 /* information local to each processor */ 2562 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2563 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2564 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(1))); 2565 PetscCall(PetscViewerFlush(viewer)); 2566 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2567 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(2))); 2568 PetscCall(PetscViewerFlush(viewer)); 2569 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2570 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(3))); 2571 PetscCall(PetscViewerFlush(viewer)); 2572 2573 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2574 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2575 PetscCall(PetscViewerFlush(viewer)); 2576 2577 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2578 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2579 PetscCall(PetscViewerFlush(viewer)); 2580 2581 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2582 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2583 PetscCall(PetscViewerFlush(viewer)); 2584 2585 if (mumps->ninfo && mumps->ninfo <= 80) { 2586 PetscInt i; 2587 for (i = 0; i < mumps->ninfo; i++) { 2588 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2589 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2590 PetscCall(PetscViewerFlush(viewer)); 2591 } 2592 } 2593 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2594 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2595 2596 if (mumps->myid == 0) { /* information from the host */ 2597 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)mumps->id.RINFOG(1))); 2598 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)mumps->id.RINFOG(2))); 2599 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)mumps->id.RINFOG(3))); 2600 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)mumps->id.RINFOG(12), (double)mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2601 2602 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2603 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2604 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2605 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2606 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2607 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2608 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2609 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2610 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2611 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2612 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2613 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2614 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2615 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2616 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2617 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2618 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2619 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2620 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2621 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2622 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2623 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2624 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2625 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2626 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2627 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2628 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2629 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2630 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2631 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2632 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2633 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2634 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2635 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2636 } 2637 } 2638 } 2639 PetscFunctionReturn(PETSC_SUCCESS); 2640 } 2641 2642 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info) 2643 { 2644 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2645 2646 PetscFunctionBegin; 2647 info->block_size = 1.0; 2648 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2649 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2650 info->nz_unneeded = 0.0; 2651 info->assemblies = 0.0; 2652 info->mallocs = 0.0; 2653 info->memory = 0.0; 2654 info->fill_ratio_given = 0; 2655 info->fill_ratio_needed = 0; 2656 info->factor_mallocs = 0; 2657 PetscFunctionReturn(PETSC_SUCCESS); 2658 } 2659 2660 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2661 { 2662 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2663 const PetscScalar *arr; 2664 const PetscInt *idxs; 2665 PetscInt size, i; 2666 2667 PetscFunctionBegin; 2668 PetscCall(ISGetLocalSize(is, &size)); 2669 /* Schur complement matrix */ 2670 PetscCall(MatDestroy(&F->schur)); 2671 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2672 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2673 mumps->id.schur = (MumpsScalar *)arr; 2674 mumps->id.size_schur = size; 2675 mumps->id.schur_lld = size; 2676 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2677 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2678 2679 /* MUMPS expects Fortran style indices */ 2680 PetscCall(PetscFree(mumps->id.listvar_schur)); 2681 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2682 PetscCall(ISGetIndices(is, &idxs)); 2683 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i])); 2684 PetscCall(ISRestoreIndices(is, &idxs)); 2685 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2686 mumps->id.ICNTL(26) = -1; 2687 PetscFunctionReturn(PETSC_SUCCESS); 2688 } 2689 2690 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2691 { 2692 Mat St; 2693 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2694 PetscScalar *array; 2695 2696 PetscFunctionBegin; 2697 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 2698 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2699 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2700 PetscCall(MatSetType(St, MATDENSE)); 2701 PetscCall(MatSetUp(St)); 2702 PetscCall(MatDenseGetArray(St, &array)); 2703 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2704 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2705 PetscInt i, j, N = mumps->id.size_schur; 2706 for (i = 0; i < N; i++) { 2707 for (j = 0; j < N; j++) { 2708 #if !defined(PETSC_USE_COMPLEX) 2709 PetscScalar val = mumps->id.schur[i * N + j]; 2710 #else 2711 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2712 #endif 2713 array[j * N + i] = val; 2714 } 2715 } 2716 } else { /* stored by columns */ 2717 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2718 } 2719 } else { /* either full or lower-triangular (not packed) */ 2720 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2721 PetscInt i, j, N = mumps->id.size_schur; 2722 for (i = 0; i < N; i++) { 2723 for (j = i; j < N; j++) { 2724 #if !defined(PETSC_USE_COMPLEX) 2725 PetscScalar val = mumps->id.schur[i * N + j]; 2726 #else 2727 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2728 #endif 2729 array[i * N + j] = array[j * N + i] = val; 2730 } 2731 } 2732 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2733 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2734 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2735 PetscInt i, j, N = mumps->id.size_schur; 2736 for (i = 0; i < N; i++) { 2737 for (j = 0; j < i + 1; j++) { 2738 #if !defined(PETSC_USE_COMPLEX) 2739 PetscScalar val = mumps->id.schur[i * N + j]; 2740 #else 2741 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2742 #endif 2743 array[i * N + j] = array[j * N + i] = val; 2744 } 2745 } 2746 } 2747 } 2748 PetscCall(MatDenseRestoreArray(St, &array)); 2749 *S = St; 2750 PetscFunctionReturn(PETSC_SUCCESS); 2751 } 2752 2753 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2754 { 2755 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2756 2757 PetscFunctionBegin; 2758 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2759 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2760 for (i = 0; i < nICNTL_pre; ++i) 2761 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2762 if (i == nICNTL_pre) { /* not already cached */ 2763 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2764 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2765 mumps->ICNTL_pre[0]++; 2766 } 2767 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2768 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2769 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2770 PetscFunctionReturn(PETSC_SUCCESS); 2771 } 2772 2773 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2774 { 2775 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2776 2777 PetscFunctionBegin; 2778 if (mumps->id.job == JOB_NULL) { 2779 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2780 *ival = 0; 2781 for (i = 0; i < nICNTL_pre; ++i) { 2782 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2783 } 2784 } else *ival = mumps->id.ICNTL(icntl); 2785 PetscFunctionReturn(PETSC_SUCCESS); 2786 } 2787 2788 /*@ 2789 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 2790 2791 Logically Collective 2792 2793 Input Parameters: 2794 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2795 . icntl - index of MUMPS parameter array ICNTL() 2796 - ival - value of MUMPS ICNTL(icntl) 2797 2798 Options Database Key: 2799 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2800 2801 Level: beginner 2802 2803 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2804 @*/ 2805 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2806 { 2807 PetscFunctionBegin; 2808 PetscValidType(F, 1); 2809 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2810 PetscValidLogicalCollectiveInt(F, icntl, 2); 2811 PetscValidLogicalCollectiveInt(F, ival, 3); 2812 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2813 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2814 PetscFunctionReturn(PETSC_SUCCESS); 2815 } 2816 2817 /*@ 2818 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 2819 2820 Logically Collective 2821 2822 Input Parameters: 2823 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2824 - icntl - index of MUMPS parameter array ICNTL() 2825 2826 Output Parameter: 2827 . ival - value of MUMPS ICNTL(icntl) 2828 2829 Level: beginner 2830 2831 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2832 @*/ 2833 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2834 { 2835 PetscFunctionBegin; 2836 PetscValidType(F, 1); 2837 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2838 PetscValidLogicalCollectiveInt(F, icntl, 2); 2839 PetscAssertPointer(ival, 3); 2840 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2841 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2842 PetscFunctionReturn(PETSC_SUCCESS); 2843 } 2844 2845 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2846 { 2847 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2848 2849 PetscFunctionBegin; 2850 if (mumps->id.job == JOB_NULL) { 2851 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2852 for (i = 0; i < nCNTL_pre; ++i) 2853 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2854 if (i == nCNTL_pre) { 2855 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2856 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2857 mumps->CNTL_pre[0]++; 2858 } 2859 mumps->CNTL_pre[1 + 2 * i] = icntl; 2860 mumps->CNTL_pre[2 + 2 * i] = val; 2861 } else mumps->id.CNTL(icntl) = val; 2862 PetscFunctionReturn(PETSC_SUCCESS); 2863 } 2864 2865 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2866 { 2867 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2868 2869 PetscFunctionBegin; 2870 if (mumps->id.job == JOB_NULL) { 2871 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2872 *val = 0.0; 2873 for (i = 0; i < nCNTL_pre; ++i) { 2874 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2875 } 2876 } else *val = mumps->id.CNTL(icntl); 2877 PetscFunctionReturn(PETSC_SUCCESS); 2878 } 2879 2880 /*@ 2881 MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 2882 2883 Logically Collective 2884 2885 Input Parameters: 2886 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2887 . icntl - index of MUMPS parameter array CNTL() 2888 - val - value of MUMPS CNTL(icntl) 2889 2890 Options Database Key: 2891 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2892 2893 Level: beginner 2894 2895 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2896 @*/ 2897 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2898 { 2899 PetscFunctionBegin; 2900 PetscValidType(F, 1); 2901 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2902 PetscValidLogicalCollectiveInt(F, icntl, 2); 2903 PetscValidLogicalCollectiveReal(F, val, 3); 2904 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2905 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2906 PetscFunctionReturn(PETSC_SUCCESS); 2907 } 2908 2909 /*@ 2910 MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 2911 2912 Logically Collective 2913 2914 Input Parameters: 2915 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2916 - icntl - index of MUMPS parameter array CNTL() 2917 2918 Output Parameter: 2919 . val - value of MUMPS CNTL(icntl) 2920 2921 Level: beginner 2922 2923 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2924 @*/ 2925 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2926 { 2927 PetscFunctionBegin; 2928 PetscValidType(F, 1); 2929 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2930 PetscValidLogicalCollectiveInt(F, icntl, 2); 2931 PetscAssertPointer(val, 3); 2932 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2933 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2934 PetscFunctionReturn(PETSC_SUCCESS); 2935 } 2936 2937 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2938 { 2939 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2940 2941 PetscFunctionBegin; 2942 *info = mumps->id.INFO(icntl); 2943 PetscFunctionReturn(PETSC_SUCCESS); 2944 } 2945 2946 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2947 { 2948 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2949 2950 PetscFunctionBegin; 2951 *infog = mumps->id.INFOG(icntl); 2952 PetscFunctionReturn(PETSC_SUCCESS); 2953 } 2954 2955 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2956 { 2957 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2958 2959 PetscFunctionBegin; 2960 *rinfo = mumps->id.RINFO(icntl); 2961 PetscFunctionReturn(PETSC_SUCCESS); 2962 } 2963 2964 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2965 { 2966 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2967 2968 PetscFunctionBegin; 2969 *rinfog = mumps->id.RINFOG(icntl); 2970 PetscFunctionReturn(PETSC_SUCCESS); 2971 } 2972 2973 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2974 { 2975 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2976 2977 PetscFunctionBegin; 2978 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2979 *size = 0; 2980 *array = NULL; 2981 if (!mumps->myid) { 2982 *size = mumps->id.INFOG(28); 2983 PetscCall(PetscMalloc1(*size, array)); 2984 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2985 } 2986 PetscFunctionReturn(PETSC_SUCCESS); 2987 } 2988 2989 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2990 { 2991 Mat Bt = NULL, Btseq = NULL; 2992 PetscBool flg; 2993 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2994 PetscScalar *aa; 2995 PetscInt spnr, *ia, *ja, M, nrhs; 2996 2997 PetscFunctionBegin; 2998 PetscAssertPointer(spRHS, 2); 2999 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 3000 if (flg) { 3001 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 3002 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 3003 3004 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3005 3006 if (mumps->petsc_size > 1) { 3007 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 3008 Btseq = b->A; 3009 } else { 3010 Btseq = Bt; 3011 } 3012 3013 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 3014 mumps->id.nrhs = nrhs; 3015 mumps->id.lrhs = M; 3016 mumps->id.rhs = NULL; 3017 3018 if (!mumps->myid) { 3019 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 3020 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3021 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3022 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3023 mumps->id.rhs_sparse = (MumpsScalar *)aa; 3024 } else { 3025 mumps->id.irhs_ptr = NULL; 3026 mumps->id.irhs_sparse = NULL; 3027 mumps->id.nz_rhs = 0; 3028 mumps->id.rhs_sparse = NULL; 3029 } 3030 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3031 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3032 3033 /* solve phase */ 3034 mumps->id.job = JOB_SOLVE; 3035 PetscMUMPS_c(mumps); 3036 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 3037 3038 if (!mumps->myid) { 3039 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3040 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3041 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3042 } 3043 PetscFunctionReturn(PETSC_SUCCESS); 3044 } 3045 3046 /*@ 3047 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc> 3048 3049 Logically Collective 3050 3051 Input Parameter: 3052 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3053 3054 Output Parameter: 3055 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3056 3057 Level: beginner 3058 3059 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3060 @*/ 3061 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3062 { 3063 PetscFunctionBegin; 3064 PetscValidType(F, 1); 3065 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3066 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3067 PetscFunctionReturn(PETSC_SUCCESS); 3068 } 3069 3070 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3071 { 3072 Mat spRHS; 3073 3074 PetscFunctionBegin; 3075 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3076 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3077 PetscCall(MatDestroy(&spRHS)); 3078 PetscFunctionReturn(PETSC_SUCCESS); 3079 } 3080 3081 /*@ 3082 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc> 3083 3084 Logically Collective 3085 3086 Input Parameter: 3087 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3088 3089 Output Parameter: 3090 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3091 3092 Level: beginner 3093 3094 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3095 @*/ 3096 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3097 { 3098 PetscBool flg; 3099 3100 PetscFunctionBegin; 3101 PetscValidType(F, 1); 3102 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3103 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3104 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3105 3106 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3107 PetscFunctionReturn(PETSC_SUCCESS); 3108 } 3109 3110 /*@ 3111 MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc> 3112 3113 Logically Collective 3114 3115 Input Parameters: 3116 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3117 - icntl - index of MUMPS parameter array INFO() 3118 3119 Output Parameter: 3120 . ival - value of MUMPS INFO(icntl) 3121 3122 Level: beginner 3123 3124 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3125 @*/ 3126 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3127 { 3128 PetscFunctionBegin; 3129 PetscValidType(F, 1); 3130 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3131 PetscAssertPointer(ival, 3); 3132 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3133 PetscFunctionReturn(PETSC_SUCCESS); 3134 } 3135 3136 /*@ 3137 MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc> 3138 3139 Logically Collective 3140 3141 Input Parameters: 3142 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3143 - icntl - index of MUMPS parameter array INFOG() 3144 3145 Output Parameter: 3146 . ival - value of MUMPS INFOG(icntl) 3147 3148 Level: beginner 3149 3150 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3151 @*/ 3152 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3153 { 3154 PetscFunctionBegin; 3155 PetscValidType(F, 1); 3156 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3157 PetscAssertPointer(ival, 3); 3158 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3159 PetscFunctionReturn(PETSC_SUCCESS); 3160 } 3161 3162 /*@ 3163 MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc> 3164 3165 Logically Collective 3166 3167 Input Parameters: 3168 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3169 - icntl - index of MUMPS parameter array RINFO() 3170 3171 Output Parameter: 3172 . val - value of MUMPS RINFO(icntl) 3173 3174 Level: beginner 3175 3176 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3177 @*/ 3178 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3179 { 3180 PetscFunctionBegin; 3181 PetscValidType(F, 1); 3182 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3183 PetscAssertPointer(val, 3); 3184 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3185 PetscFunctionReturn(PETSC_SUCCESS); 3186 } 3187 3188 /*@ 3189 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc> 3190 3191 Logically Collective 3192 3193 Input Parameters: 3194 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3195 - icntl - index of MUMPS parameter array RINFOG() 3196 3197 Output Parameter: 3198 . val - value of MUMPS RINFOG(icntl) 3199 3200 Level: beginner 3201 3202 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3203 @*/ 3204 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3205 { 3206 PetscFunctionBegin; 3207 PetscValidType(F, 1); 3208 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3209 PetscAssertPointer(val, 3); 3210 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3211 PetscFunctionReturn(PETSC_SUCCESS); 3212 } 3213 3214 /*@ 3215 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc> 3216 3217 Logically Collective 3218 3219 Input Parameter: 3220 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3221 3222 Output Parameters: 3223 + size - local size of the array. The size of the array is non-zero only on the host. 3224 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3225 for freeing this array. 3226 3227 Level: beginner 3228 3229 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3230 @*/ 3231 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3232 { 3233 PetscFunctionBegin; 3234 PetscValidType(F, 1); 3235 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3236 PetscAssertPointer(size, 2); 3237 PetscAssertPointer(array, 3); 3238 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3239 PetscFunctionReturn(PETSC_SUCCESS); 3240 } 3241 3242 /*MC 3243 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3244 distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc> 3245 3246 Works with `MATAIJ` and `MATSBAIJ` matrices 3247 3248 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3249 3250 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3251 See details below. 3252 3253 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3254 3255 Options Database Keys: 3256 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3257 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3258 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3259 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3260 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3261 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3262 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3263 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3264 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3265 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3266 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3267 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3268 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3269 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3270 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3271 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3272 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3273 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3274 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3275 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3276 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3277 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering 3278 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3279 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3280 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3281 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3282 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3283 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3284 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3285 . -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism 3286 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3287 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3288 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3289 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3290 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3291 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3292 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3293 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3294 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3295 3296 Level: beginner 3297 3298 Notes: 3299 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will 3300 error if the matrix is Hermitian. 3301 3302 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3303 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3304 3305 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3306 the failure with 3307 .vb 3308 KSPGetPC(ksp,&pc); 3309 PCFactorGetMatrix(pc,&mat); 3310 MatMumpsGetInfo(mat,....); 3311 MatMumpsGetInfog(mat,....); etc. 3312 .ve 3313 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3314 3315 MUMPS provides 64-bit integer support in two build modes: 3316 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3317 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3318 3319 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3320 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3321 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3322 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3323 3324 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3325 3326 Two modes to run MUMPS/PETSc with OpenMP 3327 .vb 3328 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3329 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3330 .ve 3331 3332 .vb 3333 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3334 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3335 .ve 3336 3337 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3338 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3339 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3340 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3341 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3342 3343 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3344 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3345 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3346 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3347 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3348 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3349 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3350 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3351 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3352 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3353 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3354 examine the mapping result. 3355 3356 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3357 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3358 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3359 3360 See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating` 3361 3362 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3363 M*/ 3364 3365 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type) 3366 { 3367 PetscFunctionBegin; 3368 *type = MATSOLVERMUMPS; 3369 PetscFunctionReturn(PETSC_SUCCESS); 3370 } 3371 3372 /* MatGetFactor for Seq and MPI AIJ matrices */ 3373 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3374 { 3375 Mat B; 3376 Mat_MUMPS *mumps; 3377 PetscBool isSeqAIJ, isDiag, isDense; 3378 PetscMPIInt size; 3379 3380 PetscFunctionBegin; 3381 #if defined(PETSC_USE_COMPLEX) 3382 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3383 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3384 *F = NULL; 3385 PetscFunctionReturn(PETSC_SUCCESS); 3386 } 3387 #endif 3388 /* Create the factorization matrix */ 3389 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3390 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 3391 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3392 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3393 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3394 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3395 PetscCall(MatSetUp(B)); 3396 3397 PetscCall(PetscNew(&mumps)); 3398 3399 B->ops->view = MatView_MUMPS; 3400 B->ops->getinfo = MatGetInfo_MUMPS; 3401 3402 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3403 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3404 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3405 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3406 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3407 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3408 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3409 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3410 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3411 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3412 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3413 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3414 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3415 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3416 3417 if (ftype == MAT_FACTOR_LU) { 3418 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3419 B->factortype = MAT_FACTOR_LU; 3420 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3421 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3422 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3423 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3424 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3425 mumps->sym = 0; 3426 } else { 3427 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3428 B->factortype = MAT_FACTOR_CHOLESKY; 3429 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3430 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3431 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3432 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3433 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3434 #if defined(PETSC_USE_COMPLEX) 3435 mumps->sym = 2; 3436 #else 3437 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3438 else mumps->sym = 2; 3439 #endif 3440 } 3441 3442 /* set solvertype */ 3443 PetscCall(PetscFree(B->solvertype)); 3444 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3445 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3446 if (size == 1) { 3447 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3448 B->canuseordering = PETSC_TRUE; 3449 } 3450 B->ops->destroy = MatDestroy_MUMPS; 3451 B->data = (void *)mumps; 3452 3453 *F = B; 3454 mumps->id.job = JOB_NULL; 3455 mumps->ICNTL_pre = NULL; 3456 mumps->CNTL_pre = NULL; 3457 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3458 PetscFunctionReturn(PETSC_SUCCESS); 3459 } 3460 3461 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3462 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F) 3463 { 3464 Mat B; 3465 Mat_MUMPS *mumps; 3466 PetscBool isSeqSBAIJ; 3467 PetscMPIInt size; 3468 3469 PetscFunctionBegin; 3470 #if defined(PETSC_USE_COMPLEX) 3471 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3472 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3473 *F = NULL; 3474 PetscFunctionReturn(PETSC_SUCCESS); 3475 } 3476 #endif 3477 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3478 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3479 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3480 PetscCall(MatSetUp(B)); 3481 3482 PetscCall(PetscNew(&mumps)); 3483 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3484 if (isSeqSBAIJ) { 3485 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3486 } else { 3487 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3488 } 3489 3490 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3491 B->ops->view = MatView_MUMPS; 3492 B->ops->getinfo = MatGetInfo_MUMPS; 3493 3494 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3495 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3496 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3497 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3498 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3499 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3500 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3501 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3502 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3503 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3504 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3505 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3506 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3507 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3508 3509 B->factortype = MAT_FACTOR_CHOLESKY; 3510 #if defined(PETSC_USE_COMPLEX) 3511 mumps->sym = 2; 3512 #else 3513 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3514 else mumps->sym = 2; 3515 #endif 3516 3517 /* set solvertype */ 3518 PetscCall(PetscFree(B->solvertype)); 3519 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3520 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3521 if (size == 1) { 3522 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3523 B->canuseordering = PETSC_TRUE; 3524 } 3525 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3526 B->ops->destroy = MatDestroy_MUMPS; 3527 B->data = (void *)mumps; 3528 3529 *F = B; 3530 mumps->id.job = JOB_NULL; 3531 mumps->ICNTL_pre = NULL; 3532 mumps->CNTL_pre = NULL; 3533 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3534 PetscFunctionReturn(PETSC_SUCCESS); 3535 } 3536 3537 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3538 { 3539 Mat B; 3540 Mat_MUMPS *mumps; 3541 PetscBool isSeqBAIJ; 3542 PetscMPIInt size; 3543 3544 PetscFunctionBegin; 3545 /* Create the factorization matrix */ 3546 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3547 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3548 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3549 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3550 PetscCall(MatSetUp(B)); 3551 3552 PetscCall(PetscNew(&mumps)); 3553 if (ftype == MAT_FACTOR_LU) { 3554 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3555 B->factortype = MAT_FACTOR_LU; 3556 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3557 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3558 mumps->sym = 0; 3559 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3560 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3561 3562 B->ops->view = MatView_MUMPS; 3563 B->ops->getinfo = MatGetInfo_MUMPS; 3564 3565 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3566 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3567 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3568 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3569 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3570 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3571 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3572 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3573 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3574 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3575 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3576 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3577 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3578 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3579 3580 /* set solvertype */ 3581 PetscCall(PetscFree(B->solvertype)); 3582 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3583 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3584 if (size == 1) { 3585 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3586 B->canuseordering = PETSC_TRUE; 3587 } 3588 B->ops->destroy = MatDestroy_MUMPS; 3589 B->data = (void *)mumps; 3590 3591 *F = B; 3592 mumps->id.job = JOB_NULL; 3593 mumps->ICNTL_pre = NULL; 3594 mumps->CNTL_pre = NULL; 3595 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3596 PetscFunctionReturn(PETSC_SUCCESS); 3597 } 3598 3599 /* MatGetFactor for Seq and MPI SELL matrices */ 3600 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3601 { 3602 Mat B; 3603 Mat_MUMPS *mumps; 3604 PetscBool isSeqSELL; 3605 PetscMPIInt size; 3606 3607 PetscFunctionBegin; 3608 /* Create the factorization matrix */ 3609 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3610 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3611 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3612 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3613 PetscCall(MatSetUp(B)); 3614 3615 PetscCall(PetscNew(&mumps)); 3616 3617 B->ops->view = MatView_MUMPS; 3618 B->ops->getinfo = MatGetInfo_MUMPS; 3619 3620 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3621 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3622 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3623 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3624 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3625 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3626 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3627 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3628 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3629 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3630 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3631 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3632 3633 if (ftype == MAT_FACTOR_LU) { 3634 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3635 B->factortype = MAT_FACTOR_LU; 3636 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3637 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3638 mumps->sym = 0; 3639 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3640 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3641 3642 /* set solvertype */ 3643 PetscCall(PetscFree(B->solvertype)); 3644 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3645 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3646 if (size == 1) { 3647 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3648 B->canuseordering = PETSC_TRUE; 3649 } 3650 B->ops->destroy = MatDestroy_MUMPS; 3651 B->data = (void *)mumps; 3652 3653 *F = B; 3654 mumps->id.job = JOB_NULL; 3655 mumps->ICNTL_pre = NULL; 3656 mumps->CNTL_pre = NULL; 3657 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3658 PetscFunctionReturn(PETSC_SUCCESS); 3659 } 3660 3661 /* MatGetFactor for MATNEST matrices */ 3662 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3663 { 3664 Mat B, **mats; 3665 Mat_MUMPS *mumps; 3666 PetscInt nr, nc; 3667 PetscMPIInt size; 3668 PetscBool flg = PETSC_TRUE; 3669 3670 PetscFunctionBegin; 3671 #if defined(PETSC_USE_COMPLEX) 3672 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3673 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3674 *F = NULL; 3675 PetscFunctionReturn(PETSC_SUCCESS); 3676 } 3677 #endif 3678 3679 /* Return if some condition is not satisfied */ 3680 *F = NULL; 3681 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3682 if (ftype == MAT_FACTOR_CHOLESKY) { 3683 IS *rows, *cols; 3684 PetscInt *m, *M; 3685 3686 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3687 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3688 PetscCall(MatNestGetISs(A, rows, cols)); 3689 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3690 if (!flg) { 3691 PetscCall(PetscFree2(rows, cols)); 3692 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3693 PetscFunctionReturn(PETSC_SUCCESS); 3694 } 3695 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3696 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3697 for (PetscInt r = 0; flg && r < nr; r++) 3698 for (PetscInt k = r + 1; flg && k < nr; k++) 3699 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3700 PetscCall(PetscFree2(m, M)); 3701 PetscCall(PetscFree2(rows, cols)); 3702 if (!flg) { 3703 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3704 PetscFunctionReturn(PETSC_SUCCESS); 3705 } 3706 } 3707 3708 for (PetscInt r = 0; r < nr; r++) { 3709 for (PetscInt c = 0; c < nc; c++) { 3710 Mat sub = mats[r][c]; 3711 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isDiag, isDense; 3712 3713 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3714 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3715 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3716 else { 3717 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3718 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3719 } 3720 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3721 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3722 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3723 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3724 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3725 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3726 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 3727 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3728 if (ftype == MAT_FACTOR_CHOLESKY) { 3729 if (r == c) { 3730 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 3731 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3732 flg = PETSC_FALSE; 3733 } 3734 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3735 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3736 flg = PETSC_FALSE; 3737 } 3738 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3739 PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3740 flg = PETSC_FALSE; 3741 } 3742 } 3743 } 3744 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3745 3746 /* Create the factorization matrix */ 3747 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3748 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3749 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3750 PetscCall(MatSetUp(B)); 3751 3752 PetscCall(PetscNew(&mumps)); 3753 3754 B->ops->view = MatView_MUMPS; 3755 B->ops->getinfo = MatGetInfo_MUMPS; 3756 3757 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3758 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3759 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3760 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3761 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3762 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3763 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3764 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3765 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3766 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3767 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3768 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3769 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3770 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3771 3772 if (ftype == MAT_FACTOR_LU) { 3773 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3774 B->factortype = MAT_FACTOR_LU; 3775 mumps->sym = 0; 3776 } else { 3777 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3778 B->factortype = MAT_FACTOR_CHOLESKY; 3779 #if defined(PETSC_USE_COMPLEX) 3780 mumps->sym = 2; 3781 #else 3782 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3783 else mumps->sym = 2; 3784 #endif 3785 } 3786 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3787 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3788 3789 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3790 if (size == 1) { 3791 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3792 B->canuseordering = PETSC_TRUE; 3793 } 3794 3795 /* set solvertype */ 3796 PetscCall(PetscFree(B->solvertype)); 3797 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3798 B->ops->destroy = MatDestroy_MUMPS; 3799 B->data = (void *)mumps; 3800 3801 *F = B; 3802 mumps->id.job = JOB_NULL; 3803 mumps->ICNTL_pre = NULL; 3804 mumps->CNTL_pre = NULL; 3805 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3806 PetscFunctionReturn(PETSC_SUCCESS); 3807 } 3808 3809 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3810 { 3811 PetscFunctionBegin; 3812 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3813 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3814 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3815 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3816 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3817 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3818 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3819 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3820 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3821 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3822 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3823 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3824 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3825 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3826 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3827 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3828 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3829 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3830 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3831 PetscFunctionReturn(PETSC_SUCCESS); 3832 } 3833