1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_NULL 0 27 #define JOB_FACTSYMBOLIC 1 28 #define JOB_FACTNUMERIC 2 29 #define JOB_SOLVE 3 30 #define JOB_END -2 31 32 /* calls to MUMPS */ 33 #if defined(PETSC_USE_COMPLEX) 34 #if defined(PETSC_USE_REAL_SINGLE) 35 #define MUMPS_c cmumps_c 36 #else 37 #define MUMPS_c zmumps_c 38 #endif 39 #else 40 #if defined(PETSC_USE_REAL_SINGLE) 41 #define MUMPS_c smumps_c 42 #else 43 #define MUMPS_c dmumps_c 44 #endif 45 #endif 46 47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 48 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 49 naming convention in PetscMPIInt, PetscBLASInt etc. 50 */ 51 typedef MUMPS_INT PetscMUMPSInt; 52 53 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 54 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 55 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 56 #endif 57 #else 58 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 59 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 60 #endif 61 #endif 62 63 #define MPIU_MUMPSINT MPI_INT 64 #define PETSC_MUMPS_INT_MAX 2147483647 65 #define PETSC_MUMPS_INT_MIN -2147483648 66 67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 69 { 70 PetscFunctionBegin; 71 #if PetscDefined(USE_64BIT_INDICES) 72 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 73 #endif 74 *b = (PetscMUMPSInt)(a); 75 PetscFunctionReturn(PETSC_SUCCESS); 76 } 77 78 /* Put these utility routines here since they are only used in this file */ 79 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 80 { 81 PetscInt myval; 82 PetscBool myset; 83 PetscFunctionBegin; 84 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 85 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 86 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 87 if (set) *set = myset; 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 91 92 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 93 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 94 #define PetscMUMPS_c(mumps) \ 95 do { \ 96 if (mumps->use_petsc_omp_support) { \ 97 if (mumps->is_omp_master) { \ 98 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 99 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 100 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 101 PetscCall(PetscFPTrapPop()); \ 102 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 103 } \ 104 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 105 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 106 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 107 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 108 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 109 */ \ 110 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 111 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 112 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 113 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 114 } else { \ 115 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 116 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 117 PetscCall(PetscFPTrapPop()); \ 118 } \ 119 } while (0) 120 #else 121 #define PetscMUMPS_c(mumps) \ 122 do { \ 123 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 124 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 125 PetscCall(PetscFPTrapPop()); \ 126 } while (0) 127 #endif 128 129 /* declare MumpsScalar */ 130 #if defined(PETSC_USE_COMPLEX) 131 #if defined(PETSC_USE_REAL_SINGLE) 132 #define MumpsScalar mumps_complex 133 #else 134 #define MumpsScalar mumps_double_complex 135 #endif 136 #else 137 #define MumpsScalar PetscScalar 138 #endif 139 140 /* macros s.t. indices match MUMPS documentation */ 141 #define ICNTL(I) icntl[(I)-1] 142 #define CNTL(I) cntl[(I)-1] 143 #define INFOG(I) infog[(I)-1] 144 #define INFO(I) info[(I)-1] 145 #define RINFOG(I) rinfog[(I)-1] 146 #define RINFO(I) rinfo[(I)-1] 147 148 typedef struct Mat_MUMPS Mat_MUMPS; 149 struct Mat_MUMPS { 150 #if defined(PETSC_USE_COMPLEX) 151 #if defined(PETSC_USE_REAL_SINGLE) 152 CMUMPS_STRUC_C id; 153 #else 154 ZMUMPS_STRUC_C id; 155 #endif 156 #else 157 #if defined(PETSC_USE_REAL_SINGLE) 158 SMUMPS_STRUC_C id; 159 #else 160 DMUMPS_STRUC_C id; 161 #endif 162 #endif 163 164 MatStructure matstruc; 165 PetscMPIInt myid, petsc_size; 166 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 167 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 168 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 169 PetscMUMPSInt sym; 170 MPI_Comm mumps_comm; 171 PetscMUMPSInt *ICNTL_pre; 172 PetscReal *CNTL_pre; 173 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 174 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 175 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 176 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 177 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 178 PetscInt *rhs_nrow, max_nrhs; 179 PetscMPIInt *rhs_recvcounts, *rhs_disps; 180 PetscScalar *rhs_loc, *rhs_recvbuf; 181 #endif 182 Vec b_seq, x_seq; 183 PetscInt ninfo, *info; /* which INFO to display */ 184 PetscInt sizeredrhs; 185 PetscScalar *schur_sol; 186 PetscInt schur_sizesol; 187 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 188 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 189 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 190 191 /* stuff used by petsc/mumps OpenMP support*/ 192 PetscBool use_petsc_omp_support; 193 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 194 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 195 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 196 PetscMPIInt tag, omp_comm_size; 197 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 198 MPI_Request *reqs; 199 }; 200 201 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 202 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 203 */ 204 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 205 { 206 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 207 208 PetscFunctionBegin; 209 #if defined(PETSC_USE_64BIT_INDICES) 210 { 211 PetscInt i; 212 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 213 PetscCall(PetscFree(mumps->ia_alloc)); 214 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 215 mumps->cur_ilen = nrow + 1; 216 } 217 if (nnz > mumps->cur_jlen) { 218 PetscCall(PetscFree(mumps->ja_alloc)); 219 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 220 mumps->cur_jlen = nnz; 221 } 222 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 223 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 224 *ia_mumps = mumps->ia_alloc; 225 *ja_mumps = mumps->ja_alloc; 226 } 227 #else 228 *ia_mumps = ia; 229 *ja_mumps = ja; 230 #endif 231 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 232 PetscFunctionReturn(PETSC_SUCCESS); 233 } 234 235 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 236 { 237 PetscFunctionBegin; 238 PetscCall(PetscFree(mumps->id.listvar_schur)); 239 PetscCall(PetscFree(mumps->id.redrhs)); 240 PetscCall(PetscFree(mumps->schur_sol)); 241 mumps->id.size_schur = 0; 242 mumps->id.schur_lld = 0; 243 mumps->id.ICNTL(19) = 0; 244 PetscFunctionReturn(PETSC_SUCCESS); 245 } 246 247 /* solve with rhs in mumps->id.redrhs and return in the same location */ 248 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 249 { 250 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 251 Mat S, B, X; 252 MatFactorSchurStatus schurstatus; 253 PetscInt sizesol; 254 255 PetscFunctionBegin; 256 PetscCall(MatFactorFactorizeSchurComplement(F)); 257 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 258 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 259 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 260 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 261 PetscCall(MatBindToCPU(B, S->boundtocpu)); 262 #endif 263 switch (schurstatus) { 264 case MAT_FACTOR_SCHUR_FACTORED: 265 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 266 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 267 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 268 PetscCall(MatBindToCPU(X, S->boundtocpu)); 269 #endif 270 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 271 PetscCall(MatMatSolveTranspose(S, B, X)); 272 } else { 273 PetscCall(MatMatSolve(S, B, X)); 274 } 275 break; 276 case MAT_FACTOR_SCHUR_INVERTED: 277 sizesol = mumps->id.nrhs * mumps->id.size_schur; 278 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 279 PetscCall(PetscFree(mumps->schur_sol)); 280 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 281 mumps->schur_sizesol = sizesol; 282 } 283 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 284 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 285 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 286 PetscCall(MatBindToCPU(X, S->boundtocpu)); 287 #endif 288 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 289 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 290 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 291 } else { 292 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 293 } 294 PetscCall(MatProductSetFromOptions(X)); 295 PetscCall(MatProductSymbolic(X)); 296 PetscCall(MatProductNumeric(X)); 297 298 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 299 break; 300 default: 301 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 302 } 303 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 304 PetscCall(MatDestroy(&B)); 305 PetscCall(MatDestroy(&X)); 306 PetscFunctionReturn(PETSC_SUCCESS); 307 } 308 309 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 310 { 311 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 312 313 PetscFunctionBegin; 314 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 315 PetscFunctionReturn(PETSC_SUCCESS); 316 } 317 if (!expansion) { /* prepare for the condensation step */ 318 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 319 /* allocate MUMPS internal array to store reduced right-hand sides */ 320 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 321 PetscCall(PetscFree(mumps->id.redrhs)); 322 mumps->id.lredrhs = mumps->id.size_schur; 323 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 324 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 325 } 326 mumps->id.ICNTL(26) = 1; /* condensation phase */ 327 } else { /* prepare for the expansion step */ 328 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 329 PetscCall(MatMumpsSolveSchur_Private(F)); 330 mumps->id.ICNTL(26) = 2; /* expansion phase */ 331 PetscMUMPS_c(mumps); 332 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 333 /* restore defaults */ 334 mumps->id.ICNTL(26) = -1; 335 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 336 if (mumps->id.nrhs > 1) { 337 PetscCall(PetscFree(mumps->id.redrhs)); 338 mumps->id.lredrhs = 0; 339 mumps->sizeredrhs = 0; 340 } 341 } 342 PetscFunctionReturn(PETSC_SUCCESS); 343 } 344 345 /* 346 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 347 348 input: 349 A - matrix in aij,baij or sbaij format 350 shift - 0: C style output triple; 1: Fortran style output triple. 351 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 352 MAT_REUSE_MATRIX: only the values in v array are updated 353 output: 354 nnz - dim of r, c, and v (number of local nonzero entries of A) 355 r, c, v - row and col index, matrix values (matrix triples) 356 357 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 358 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 359 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 360 361 */ 362 363 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 364 { 365 const PetscScalar *av; 366 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 367 PetscInt64 nz, rnz, i, j, k; 368 PetscMUMPSInt *row, *col; 369 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 370 371 PetscFunctionBegin; 372 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 373 mumps->val = (PetscScalar *)av; 374 if (reuse == MAT_INITIAL_MATRIX) { 375 nz = aa->nz; 376 ai = aa->i; 377 aj = aa->j; 378 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 379 for (i = k = 0; i < M; i++) { 380 rnz = ai[i + 1] - ai[i]; 381 ajj = aj + ai[i]; 382 for (j = 0; j < rnz; j++) { 383 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 384 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 385 k++; 386 } 387 } 388 mumps->irn = row; 389 mumps->jcn = col; 390 mumps->nnz = nz; 391 } 392 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 393 PetscFunctionReturn(PETSC_SUCCESS); 394 } 395 396 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 397 { 398 PetscInt64 nz, i, j, k, r; 399 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 400 PetscMUMPSInt *row, *col; 401 402 PetscFunctionBegin; 403 mumps->val = a->val; 404 if (reuse == MAT_INITIAL_MATRIX) { 405 nz = a->sliidx[a->totalslices]; 406 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 407 for (i = k = 0; i < a->totalslices; i++) { 408 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 409 } 410 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 411 mumps->irn = row; 412 mumps->jcn = col; 413 mumps->nnz = nz; 414 } 415 PetscFunctionReturn(PETSC_SUCCESS); 416 } 417 418 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 419 { 420 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 421 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 422 PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 423 PetscInt bs; 424 PetscMUMPSInt *row, *col; 425 426 PetscFunctionBegin; 427 PetscCall(MatGetBlockSize(A, &bs)); 428 M = A->rmap->N / bs; 429 mumps->val = aa->a; 430 if (reuse == MAT_INITIAL_MATRIX) { 431 ai = aa->i; 432 aj = aa->j; 433 nz = bs2 * aa->nz; 434 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 435 for (i = 0; i < M; i++) { 436 ajj = aj + ai[i]; 437 rnz = ai[i + 1] - ai[i]; 438 for (k = 0; k < rnz; k++) { 439 for (j = 0; j < bs; j++) { 440 for (m = 0; m < bs; m++) { 441 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 442 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 443 idx++; 444 } 445 } 446 } 447 } 448 mumps->irn = row; 449 mumps->jcn = col; 450 mumps->nnz = nz; 451 } 452 PetscFunctionReturn(PETSC_SUCCESS); 453 } 454 455 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 456 { 457 const PetscInt *ai, *aj, *ajj; 458 PetscInt bs; 459 PetscInt64 nz, rnz, i, j, k, m; 460 PetscMUMPSInt *row, *col; 461 PetscScalar *val; 462 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 463 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 464 #if defined(PETSC_USE_COMPLEX) 465 PetscBool isset, hermitian; 466 #endif 467 468 PetscFunctionBegin; 469 #if defined(PETSC_USE_COMPLEX) 470 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 471 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 472 #endif 473 ai = aa->i; 474 aj = aa->j; 475 PetscCall(MatGetBlockSize(A, &bs)); 476 if (reuse == MAT_INITIAL_MATRIX) { 477 const PetscInt64 alloc_size = aa->nz * bs2; 478 479 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 480 if (bs > 1) { 481 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 482 mumps->val = mumps->val_alloc; 483 } else { 484 mumps->val = aa->a; 485 } 486 mumps->irn = row; 487 mumps->jcn = col; 488 } else { 489 if (bs == 1) mumps->val = aa->a; 490 row = mumps->irn; 491 col = mumps->jcn; 492 } 493 val = mumps->val; 494 495 nz = 0; 496 if (bs > 1) { 497 for (i = 0; i < mbs; i++) { 498 rnz = ai[i + 1] - ai[i]; 499 ajj = aj + ai[i]; 500 for (j = 0; j < rnz; j++) { 501 for (k = 0; k < bs; k++) { 502 for (m = 0; m < bs; m++) { 503 if (ajj[j] > i || k >= m) { 504 if (reuse == MAT_INITIAL_MATRIX) { 505 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 506 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 507 } 508 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 509 } 510 } 511 } 512 } 513 } 514 } else if (reuse == MAT_INITIAL_MATRIX) { 515 for (i = 0; i < mbs; i++) { 516 rnz = ai[i + 1] - ai[i]; 517 ajj = aj + ai[i]; 518 for (j = 0; j < rnz; j++) { 519 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 520 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 521 nz++; 522 } 523 } 524 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 525 } 526 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 527 PetscFunctionReturn(PETSC_SUCCESS); 528 } 529 530 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 531 { 532 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 533 PetscInt64 nz, rnz, i, j; 534 const PetscScalar *av, *v1; 535 PetscScalar *val; 536 PetscMUMPSInt *row, *col; 537 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 538 PetscBool missing; 539 #if defined(PETSC_USE_COMPLEX) 540 PetscBool hermitian, isset; 541 #endif 542 543 PetscFunctionBegin; 544 #if defined(PETSC_USE_COMPLEX) 545 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 546 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 547 #endif 548 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 549 ai = aa->i; 550 aj = aa->j; 551 adiag = aa->diag; 552 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 553 if (reuse == MAT_INITIAL_MATRIX) { 554 /* count nz in the upper triangular part of A */ 555 nz = 0; 556 if (missing) { 557 for (i = 0; i < M; i++) { 558 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 559 for (j = ai[i]; j < ai[i + 1]; j++) { 560 if (aj[j] < i) continue; 561 nz++; 562 } 563 } else { 564 nz += ai[i + 1] - adiag[i]; 565 } 566 } 567 } else { 568 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 569 } 570 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 571 PetscCall(PetscMalloc1(nz, &val)); 572 mumps->nnz = nz; 573 mumps->irn = row; 574 mumps->jcn = col; 575 mumps->val = mumps->val_alloc = val; 576 577 nz = 0; 578 if (missing) { 579 for (i = 0; i < M; i++) { 580 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 581 for (j = ai[i]; j < ai[i + 1]; j++) { 582 if (aj[j] < i) continue; 583 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 584 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 585 val[nz] = av[j]; 586 nz++; 587 } 588 } else { 589 rnz = ai[i + 1] - adiag[i]; 590 ajj = aj + adiag[i]; 591 v1 = av + adiag[i]; 592 for (j = 0; j < rnz; j++) { 593 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 594 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 595 val[nz++] = v1[j]; 596 } 597 } 598 } 599 } else { 600 for (i = 0; i < M; i++) { 601 rnz = ai[i + 1] - adiag[i]; 602 ajj = aj + adiag[i]; 603 v1 = av + adiag[i]; 604 for (j = 0; j < rnz; j++) { 605 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 606 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 607 val[nz++] = v1[j]; 608 } 609 } 610 } 611 } else { 612 nz = 0; 613 val = mumps->val; 614 if (missing) { 615 for (i = 0; i < M; i++) { 616 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 617 for (j = ai[i]; j < ai[i + 1]; j++) { 618 if (aj[j] < i) continue; 619 val[nz++] = av[j]; 620 } 621 } else { 622 rnz = ai[i + 1] - adiag[i]; 623 v1 = av + adiag[i]; 624 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 625 } 626 } 627 } else { 628 for (i = 0; i < M; i++) { 629 rnz = ai[i + 1] - adiag[i]; 630 v1 = av + adiag[i]; 631 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 632 } 633 } 634 } 635 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 636 PetscFunctionReturn(PETSC_SUCCESS); 637 } 638 639 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 640 { 641 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 642 PetscInt bs; 643 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 644 PetscMUMPSInt *row, *col; 645 const PetscScalar *av, *bv, *v1, *v2; 646 PetscScalar *val; 647 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 648 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 649 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 650 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 651 #if defined(PETSC_USE_COMPLEX) 652 PetscBool hermitian, isset; 653 #endif 654 655 PetscFunctionBegin; 656 #if defined(PETSC_USE_COMPLEX) 657 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 658 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 659 #endif 660 PetscCall(MatGetBlockSize(A, &bs)); 661 rstart = A->rmap->rstart; 662 ai = aa->i; 663 aj = aa->j; 664 bi = bb->i; 665 bj = bb->j; 666 av = aa->a; 667 bv = bb->a; 668 669 garray = mat->garray; 670 671 if (reuse == MAT_INITIAL_MATRIX) { 672 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 673 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 674 PetscCall(PetscMalloc1(nz, &val)); 675 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 676 mumps->irn = row; 677 mumps->jcn = col; 678 mumps->val = mumps->val_alloc = val; 679 } else { 680 val = mumps->val; 681 } 682 683 jj = 0; 684 irow = rstart; 685 for (i = 0; i < mbs; i++) { 686 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 687 countA = ai[i + 1] - ai[i]; 688 countB = bi[i + 1] - bi[i]; 689 bjj = bj + bi[i]; 690 v1 = av + ai[i] * bs2; 691 v2 = bv + bi[i] * bs2; 692 693 if (bs > 1) { 694 /* A-part */ 695 for (j = 0; j < countA; j++) { 696 for (k = 0; k < bs; k++) { 697 for (m = 0; m < bs; m++) { 698 if (rstart + ajj[j] * bs > irow || k >= m) { 699 if (reuse == MAT_INITIAL_MATRIX) { 700 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 701 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 702 } 703 val[jj++] = v1[j * bs2 + m + k * bs]; 704 } 705 } 706 } 707 } 708 709 /* B-part */ 710 for (j = 0; j < countB; j++) { 711 for (k = 0; k < bs; k++) { 712 for (m = 0; m < bs; m++) { 713 if (reuse == MAT_INITIAL_MATRIX) { 714 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 715 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 716 } 717 val[jj++] = v2[j * bs2 + m + k * bs]; 718 } 719 } 720 } 721 } else { 722 /* A-part */ 723 for (j = 0; j < countA; j++) { 724 if (reuse == MAT_INITIAL_MATRIX) { 725 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 726 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 727 } 728 val[jj++] = v1[j]; 729 } 730 731 /* B-part */ 732 for (j = 0; j < countB; j++) { 733 if (reuse == MAT_INITIAL_MATRIX) { 734 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 735 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 736 } 737 val[jj++] = v2[j]; 738 } 739 } 740 irow += bs; 741 } 742 mumps->nnz = jj; 743 PetscFunctionReturn(PETSC_SUCCESS); 744 } 745 746 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 747 { 748 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 749 PetscInt64 rstart, nz, i, j, jj, irow, countA, countB; 750 PetscMUMPSInt *row, *col; 751 const PetscScalar *av, *bv, *v1, *v2; 752 PetscScalar *val; 753 Mat Ad, Ao; 754 Mat_SeqAIJ *aa; 755 Mat_SeqAIJ *bb; 756 757 PetscFunctionBegin; 758 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 759 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 760 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 761 762 aa = (Mat_SeqAIJ *)(Ad)->data; 763 bb = (Mat_SeqAIJ *)(Ao)->data; 764 ai = aa->i; 765 aj = aa->j; 766 bi = bb->i; 767 bj = bb->j; 768 769 rstart = A->rmap->rstart; 770 771 if (reuse == MAT_INITIAL_MATRIX) { 772 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 773 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 774 PetscCall(PetscMalloc1(nz, &val)); 775 mumps->nnz = nz; 776 mumps->irn = row; 777 mumps->jcn = col; 778 mumps->val = mumps->val_alloc = val; 779 } else { 780 val = mumps->val; 781 } 782 783 jj = 0; 784 irow = rstart; 785 for (i = 0; i < m; i++) { 786 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 787 countA = ai[i + 1] - ai[i]; 788 countB = bi[i + 1] - bi[i]; 789 bjj = bj + bi[i]; 790 v1 = av + ai[i]; 791 v2 = bv + bi[i]; 792 793 /* A-part */ 794 for (j = 0; j < countA; j++) { 795 if (reuse == MAT_INITIAL_MATRIX) { 796 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 797 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 798 } 799 val[jj++] = v1[j]; 800 } 801 802 /* B-part */ 803 for (j = 0; j < countB; j++) { 804 if (reuse == MAT_INITIAL_MATRIX) { 805 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 806 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 807 } 808 val[jj++] = v2[j]; 809 } 810 irow++; 811 } 812 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 813 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 814 PetscFunctionReturn(PETSC_SUCCESS); 815 } 816 817 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 818 { 819 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 820 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 821 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 822 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 823 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart; 824 const PetscInt bs2 = mat->bs2; 825 PetscInt bs; 826 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 827 PetscMUMPSInt *row, *col; 828 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 829 PetscScalar *val; 830 831 PetscFunctionBegin; 832 PetscCall(MatGetBlockSize(A, &bs)); 833 if (reuse == MAT_INITIAL_MATRIX) { 834 nz = bs2 * (aa->nz + bb->nz); 835 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 836 PetscCall(PetscMalloc1(nz, &val)); 837 mumps->nnz = nz; 838 mumps->irn = row; 839 mumps->jcn = col; 840 mumps->val = mumps->val_alloc = val; 841 } else { 842 val = mumps->val; 843 } 844 845 jj = 0; 846 irow = rstart; 847 for (i = 0; i < mbs; i++) { 848 countA = ai[i + 1] - ai[i]; 849 countB = bi[i + 1] - bi[i]; 850 ajj = aj + ai[i]; 851 bjj = bj + bi[i]; 852 v1 = av + bs2 * ai[i]; 853 v2 = bv + bs2 * bi[i]; 854 855 idx = 0; 856 /* A-part */ 857 for (k = 0; k < countA; k++) { 858 for (j = 0; j < bs; j++) { 859 for (n = 0; n < bs; n++) { 860 if (reuse == MAT_INITIAL_MATRIX) { 861 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 862 PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj])); 863 } 864 val[jj++] = v1[idx++]; 865 } 866 } 867 } 868 869 idx = 0; 870 /* B-part */ 871 for (k = 0; k < countB; k++) { 872 for (j = 0; j < bs; j++) { 873 for (n = 0; n < bs; n++) { 874 if (reuse == MAT_INITIAL_MATRIX) { 875 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 876 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 877 } 878 val[jj++] = v2[idx++]; 879 } 880 } 881 } 882 irow += bs; 883 } 884 PetscFunctionReturn(PETSC_SUCCESS); 885 } 886 887 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 888 { 889 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 890 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 891 PetscMUMPSInt *row, *col; 892 const PetscScalar *av, *bv, *v1, *v2; 893 PetscScalar *val; 894 Mat Ad, Ao; 895 Mat_SeqAIJ *aa; 896 Mat_SeqAIJ *bb; 897 #if defined(PETSC_USE_COMPLEX) 898 PetscBool hermitian, isset; 899 #endif 900 901 PetscFunctionBegin; 902 #if defined(PETSC_USE_COMPLEX) 903 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 904 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 905 #endif 906 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 907 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 908 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 909 910 aa = (Mat_SeqAIJ *)(Ad)->data; 911 bb = (Mat_SeqAIJ *)(Ao)->data; 912 ai = aa->i; 913 aj = aa->j; 914 adiag = aa->diag; 915 bi = bb->i; 916 bj = bb->j; 917 918 rstart = A->rmap->rstart; 919 920 if (reuse == MAT_INITIAL_MATRIX) { 921 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 922 nzb = 0; /* num of upper triangular entries in mat->B */ 923 for (i = 0; i < m; i++) { 924 nza += (ai[i + 1] - adiag[i]); 925 countB = bi[i + 1] - bi[i]; 926 bjj = bj + bi[i]; 927 for (j = 0; j < countB; j++) { 928 if (garray[bjj[j]] > rstart) nzb++; 929 } 930 } 931 932 nz = nza + nzb; /* total nz of upper triangular part of mat */ 933 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 934 PetscCall(PetscMalloc1(nz, &val)); 935 mumps->nnz = nz; 936 mumps->irn = row; 937 mumps->jcn = col; 938 mumps->val = mumps->val_alloc = val; 939 } else { 940 val = mumps->val; 941 } 942 943 jj = 0; 944 irow = rstart; 945 for (i = 0; i < m; i++) { 946 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 947 v1 = av + adiag[i]; 948 countA = ai[i + 1] - adiag[i]; 949 countB = bi[i + 1] - bi[i]; 950 bjj = bj + bi[i]; 951 v2 = bv + bi[i]; 952 953 /* A-part */ 954 for (j = 0; j < countA; j++) { 955 if (reuse == MAT_INITIAL_MATRIX) { 956 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 957 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 958 } 959 val[jj++] = v1[j]; 960 } 961 962 /* B-part */ 963 for (j = 0; j < countB; j++) { 964 if (garray[bjj[j]] > rstart) { 965 if (reuse == MAT_INITIAL_MATRIX) { 966 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 967 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 968 } 969 val[jj++] = v2[j]; 970 } 971 } 972 irow++; 973 } 974 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 975 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 976 PetscFunctionReturn(PETSC_SUCCESS); 977 } 978 979 PetscErrorCode MatDestroy_MUMPS(Mat A) 980 { 981 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 982 983 PetscFunctionBegin; 984 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 985 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 986 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 987 PetscCall(VecDestroy(&mumps->b_seq)); 988 PetscCall(VecDestroy(&mumps->x_seq)); 989 PetscCall(PetscFree(mumps->id.perm_in)); 990 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 991 PetscCall(PetscFree(mumps->val_alloc)); 992 PetscCall(PetscFree(mumps->info)); 993 PetscCall(PetscFree(mumps->ICNTL_pre)); 994 PetscCall(PetscFree(mumps->CNTL_pre)); 995 PetscCall(MatMumpsResetSchur_Private(mumps)); 996 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 997 mumps->id.job = JOB_END; 998 PetscMUMPS_c(mumps); 999 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1000 if (mumps->mumps_comm != MPI_COMM_NULL) { 1001 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1002 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1003 } 1004 } 1005 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1006 if (mumps->use_petsc_omp_support) { 1007 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1008 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1009 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1010 } 1011 #endif 1012 PetscCall(PetscFree(mumps->ia_alloc)); 1013 PetscCall(PetscFree(mumps->ja_alloc)); 1014 PetscCall(PetscFree(mumps->recvcount)); 1015 PetscCall(PetscFree(mumps->reqs)); 1016 PetscCall(PetscFree(mumps->irhs_loc)); 1017 PetscCall(PetscFree(A->data)); 1018 1019 /* clear composed functions */ 1020 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1021 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1022 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1023 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1024 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1025 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1026 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1027 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1028 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1029 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1030 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1031 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1032 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1033 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1034 PetscFunctionReturn(PETSC_SUCCESS); 1035 } 1036 1037 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1038 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1039 { 1040 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1041 const PetscMPIInt ompsize = mumps->omp_comm_size; 1042 PetscInt i, m, M, rstart; 1043 1044 PetscFunctionBegin; 1045 PetscCall(MatGetSize(A, &M, NULL)); 1046 PetscCall(MatGetLocalSize(A, &m, NULL)); 1047 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1048 if (ompsize == 1) { 1049 if (!mumps->irhs_loc) { 1050 mumps->nloc_rhs = m; 1051 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1052 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1053 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1054 } 1055 mumps->id.rhs_loc = (MumpsScalar *)array; 1056 } else { 1057 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1058 const PetscInt *ranges; 1059 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1060 MPI_Group petsc_group, omp_group; 1061 PetscScalar *recvbuf = NULL; 1062 1063 if (mumps->is_omp_master) { 1064 /* Lazily initialize the omp stuff for distributed rhs */ 1065 if (!mumps->irhs_loc) { 1066 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1067 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1068 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1069 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1070 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1071 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1072 1073 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1074 mumps->nloc_rhs = 0; 1075 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1076 for (j = 0; j < ompsize; j++) { 1077 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1078 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1079 } 1080 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1081 for (j = k = 0; j < ompsize; j++) { 1082 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1083 } 1084 1085 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1086 PetscCallMPI(MPI_Group_free(&petsc_group)); 1087 PetscCallMPI(MPI_Group_free(&omp_group)); 1088 } 1089 1090 /* Realloc buffers when current nrhs is bigger than what we have met */ 1091 if (nrhs > mumps->max_nrhs) { 1092 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1093 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1094 mumps->max_nrhs = nrhs; 1095 } 1096 1097 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1098 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1099 mumps->rhs_disps[0] = 0; 1100 for (j = 1; j < ompsize; j++) { 1101 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1102 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1103 } 1104 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1105 } 1106 1107 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1108 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1109 1110 if (mumps->is_omp_master) { 1111 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1112 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1113 for (j = 0; j < ompsize; j++) { 1114 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1115 dst = dstbase; 1116 for (i = 0; i < nrhs; i++) { 1117 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1118 src += mumps->rhs_nrow[j]; 1119 dst += mumps->nloc_rhs; 1120 } 1121 dstbase += mumps->rhs_nrow[j]; 1122 } 1123 } 1124 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1125 } 1126 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1127 } 1128 mumps->id.nrhs = nrhs; 1129 mumps->id.nloc_rhs = mumps->nloc_rhs; 1130 mumps->id.lrhs_loc = mumps->nloc_rhs; 1131 mumps->id.irhs_loc = mumps->irhs_loc; 1132 PetscFunctionReturn(PETSC_SUCCESS); 1133 } 1134 1135 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1136 { 1137 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1138 const PetscScalar *rarray = NULL; 1139 PetscScalar *array; 1140 IS is_iden, is_petsc; 1141 PetscInt i; 1142 PetscBool second_solve = PETSC_FALSE; 1143 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1144 1145 PetscFunctionBegin; 1146 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1147 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1148 &cite1)); 1149 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1150 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1151 &cite2)); 1152 1153 if (A->factorerrortype) { 1154 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1155 PetscCall(VecSetInf(x)); 1156 PetscFunctionReturn(PETSC_SUCCESS); 1157 } 1158 1159 mumps->id.nrhs = 1; 1160 if (mumps->petsc_size > 1) { 1161 if (mumps->ICNTL20 == 10) { 1162 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1163 PetscCall(VecGetArrayRead(b, &rarray)); 1164 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1165 } else { 1166 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1167 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1168 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1169 if (!mumps->myid) { 1170 PetscCall(VecGetArray(mumps->b_seq, &array)); 1171 mumps->id.rhs = (MumpsScalar *)array; 1172 } 1173 } 1174 } else { /* petsc_size == 1 */ 1175 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1176 PetscCall(VecCopy(b, x)); 1177 PetscCall(VecGetArray(x, &array)); 1178 mumps->id.rhs = (MumpsScalar *)array; 1179 } 1180 1181 /* 1182 handle condensation step of Schur complement (if any) 1183 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1184 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1185 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1186 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1187 */ 1188 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1189 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1190 second_solve = PETSC_TRUE; 1191 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1192 } 1193 /* solve phase */ 1194 mumps->id.job = JOB_SOLVE; 1195 PetscMUMPS_c(mumps); 1196 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1197 1198 /* handle expansion step of Schur complement (if any) */ 1199 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1200 1201 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1202 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1203 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1204 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1205 } 1206 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1207 PetscInt *isol2_loc = NULL; 1208 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1209 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1210 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1211 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1212 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1213 PetscCall(ISDestroy(&is_iden)); 1214 PetscCall(ISDestroy(&is_petsc)); 1215 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1216 } 1217 1218 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1219 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1220 } 1221 1222 if (mumps->petsc_size > 1) { 1223 if (mumps->ICNTL20 == 10) { 1224 PetscCall(VecRestoreArrayRead(b, &rarray)); 1225 } else if (!mumps->myid) { 1226 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1227 } 1228 } else PetscCall(VecRestoreArray(x, &array)); 1229 1230 PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3))); 1231 PetscFunctionReturn(PETSC_SUCCESS); 1232 } 1233 1234 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1235 { 1236 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1237 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1238 1239 PetscFunctionBegin; 1240 mumps->id.ICNTL(9) = 0; 1241 PetscCall(MatSolve_MUMPS(A, b, x)); 1242 mumps->id.ICNTL(9) = value; 1243 PetscFunctionReturn(PETSC_SUCCESS); 1244 } 1245 1246 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1247 { 1248 Mat Bt = NULL; 1249 PetscBool denseX, denseB, flg, flgT; 1250 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1251 PetscInt i, nrhs, M; 1252 PetscScalar *array; 1253 const PetscScalar *rbray; 1254 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1255 PetscMUMPSInt *isol_loc, *isol_loc_save; 1256 PetscScalar *bray, *sol_loc, *sol_loc_save; 1257 IS is_to, is_from; 1258 PetscInt k, proc, j, m, myrstart; 1259 const PetscInt *rstart; 1260 Vec v_mpi, msol_loc; 1261 VecScatter scat_sol; 1262 Vec b_seq; 1263 VecScatter scat_rhs; 1264 PetscScalar *aa; 1265 PetscInt spnr, *ia, *ja; 1266 Mat_MPIAIJ *b = NULL; 1267 1268 PetscFunctionBegin; 1269 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1270 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1271 1272 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1273 if (denseB) { 1274 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1275 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1276 } else { /* sparse B */ 1277 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1278 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1279 if (flgT) { /* input B is transpose of actual RHS matrix, 1280 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1281 PetscCall(MatTransposeGetMat(B, &Bt)); 1282 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1283 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1284 } 1285 1286 PetscCall(MatGetSize(B, &M, &nrhs)); 1287 mumps->id.nrhs = nrhs; 1288 mumps->id.lrhs = M; 1289 mumps->id.rhs = NULL; 1290 1291 if (mumps->petsc_size == 1) { 1292 PetscScalar *aa; 1293 PetscInt spnr, *ia, *ja; 1294 PetscBool second_solve = PETSC_FALSE; 1295 1296 PetscCall(MatDenseGetArray(X, &array)); 1297 mumps->id.rhs = (MumpsScalar *)array; 1298 1299 if (denseB) { 1300 /* copy B to X */ 1301 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1302 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1303 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1304 } else { /* sparse B */ 1305 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1306 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1307 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1308 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1309 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1310 } 1311 /* handle condensation step of Schur complement (if any) */ 1312 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1313 second_solve = PETSC_TRUE; 1314 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1315 } 1316 /* solve phase */ 1317 mumps->id.job = JOB_SOLVE; 1318 PetscMUMPS_c(mumps); 1319 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1320 1321 /* handle expansion step of Schur complement (if any) */ 1322 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1323 if (!denseB) { /* sparse B */ 1324 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1325 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1326 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1327 } 1328 PetscCall(MatDenseRestoreArray(X, &array)); 1329 PetscFunctionReturn(PETSC_SUCCESS); 1330 } 1331 1332 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1333 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1334 1335 /* create msol_loc to hold mumps local solution */ 1336 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1337 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1338 1339 lsol_loc = mumps->id.lsol_loc; 1340 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1341 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1342 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1343 mumps->id.isol_loc = isol_loc; 1344 1345 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1346 1347 if (denseB) { 1348 if (mumps->ICNTL20 == 10) { 1349 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1350 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1351 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1352 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1353 PetscCall(MatGetLocalSize(B, &m, NULL)); 1354 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1355 } else { 1356 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1357 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1358 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1359 0, re-arrange B into desired order, which is a local operation. 1360 */ 1361 1362 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1363 /* wrap dense rhs matrix B into a vector v_mpi */ 1364 PetscCall(MatGetLocalSize(B, &m, NULL)); 1365 PetscCall(MatDenseGetArray(B, &bray)); 1366 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1367 PetscCall(MatDenseRestoreArray(B, &bray)); 1368 1369 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1370 if (!mumps->myid) { 1371 PetscInt *idx; 1372 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1373 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1374 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1375 k = 0; 1376 for (proc = 0; proc < mumps->petsc_size; proc++) { 1377 for (j = 0; j < nrhs; j++) { 1378 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1379 } 1380 } 1381 1382 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1383 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1384 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1385 } else { 1386 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1387 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1388 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1389 } 1390 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1391 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1392 PetscCall(ISDestroy(&is_to)); 1393 PetscCall(ISDestroy(&is_from)); 1394 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1395 1396 if (!mumps->myid) { /* define rhs on the host */ 1397 PetscCall(VecGetArray(b_seq, &bray)); 1398 mumps->id.rhs = (MumpsScalar *)bray; 1399 PetscCall(VecRestoreArray(b_seq, &bray)); 1400 } 1401 } 1402 } else { /* sparse B */ 1403 b = (Mat_MPIAIJ *)Bt->data; 1404 1405 /* wrap dense X into a vector v_mpi */ 1406 PetscCall(MatGetLocalSize(X, &m, NULL)); 1407 PetscCall(MatDenseGetArray(X, &bray)); 1408 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1409 PetscCall(MatDenseRestoreArray(X, &bray)); 1410 1411 if (!mumps->myid) { 1412 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1413 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1414 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1415 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1416 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1417 } else { 1418 mumps->id.irhs_ptr = NULL; 1419 mumps->id.irhs_sparse = NULL; 1420 mumps->id.nz_rhs = 0; 1421 mumps->id.rhs_sparse = NULL; 1422 } 1423 } 1424 1425 /* solve phase */ 1426 mumps->id.job = JOB_SOLVE; 1427 PetscMUMPS_c(mumps); 1428 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1429 1430 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1431 PetscCall(MatDenseGetArray(X, &array)); 1432 PetscCall(VecPlaceArray(v_mpi, array)); 1433 1434 /* create scatter scat_sol */ 1435 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1436 /* iidx: index for scatter mumps solution to petsc X */ 1437 1438 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1439 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1440 for (i = 0; i < lsol_loc; i++) { 1441 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1442 1443 for (proc = 0; proc < mumps->petsc_size; proc++) { 1444 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1445 myrstart = rstart[proc]; 1446 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1447 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1448 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1449 break; 1450 } 1451 } 1452 1453 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1454 } 1455 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1456 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1457 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1458 PetscCall(ISDestroy(&is_from)); 1459 PetscCall(ISDestroy(&is_to)); 1460 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1461 PetscCall(MatDenseRestoreArray(X, &array)); 1462 1463 /* free spaces */ 1464 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1465 mumps->id.isol_loc = isol_loc_save; 1466 1467 PetscCall(PetscFree2(sol_loc, isol_loc)); 1468 PetscCall(PetscFree(idxx)); 1469 PetscCall(VecDestroy(&msol_loc)); 1470 PetscCall(VecDestroy(&v_mpi)); 1471 if (!denseB) { 1472 if (!mumps->myid) { 1473 b = (Mat_MPIAIJ *)Bt->data; 1474 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1475 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1476 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1477 } 1478 } else { 1479 if (mumps->ICNTL20 == 0) { 1480 PetscCall(VecDestroy(&b_seq)); 1481 PetscCall(VecScatterDestroy(&scat_rhs)); 1482 } 1483 } 1484 PetscCall(VecScatterDestroy(&scat_sol)); 1485 PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3))); 1486 PetscFunctionReturn(PETSC_SUCCESS); 1487 } 1488 1489 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1490 { 1491 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1492 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1493 1494 PetscFunctionBegin; 1495 mumps->id.ICNTL(9) = 0; 1496 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1497 mumps->id.ICNTL(9) = value; 1498 PetscFunctionReturn(PETSC_SUCCESS); 1499 } 1500 1501 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1502 { 1503 PetscBool flg; 1504 Mat B; 1505 1506 PetscFunctionBegin; 1507 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1508 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1509 1510 /* Create B=Bt^T that uses Bt's data structure */ 1511 PetscCall(MatCreateTranspose(Bt, &B)); 1512 1513 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1514 PetscCall(MatDestroy(&B)); 1515 PetscFunctionReturn(PETSC_SUCCESS); 1516 } 1517 1518 #if !defined(PETSC_USE_COMPLEX) 1519 /* 1520 input: 1521 F: numeric factor 1522 output: 1523 nneg: total number of negative pivots 1524 nzero: total number of zero pivots 1525 npos: (global dimension of F) - nneg - nzero 1526 */ 1527 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1528 { 1529 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1530 PetscMPIInt size; 1531 1532 PetscFunctionBegin; 1533 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1534 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1535 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1536 1537 if (nneg) *nneg = mumps->id.INFOG(12); 1538 if (nzero || npos) { 1539 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1540 if (nzero) *nzero = mumps->id.INFOG(28); 1541 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1542 } 1543 PetscFunctionReturn(PETSC_SUCCESS); 1544 } 1545 #endif 1546 1547 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1548 { 1549 PetscInt i, nreqs; 1550 PetscMUMPSInt *irn, *jcn; 1551 PetscMPIInt count; 1552 PetscInt64 totnnz, remain; 1553 const PetscInt osize = mumps->omp_comm_size; 1554 PetscScalar *val; 1555 1556 PetscFunctionBegin; 1557 if (osize > 1) { 1558 if (reuse == MAT_INITIAL_MATRIX) { 1559 /* master first gathers counts of nonzeros to receive */ 1560 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1561 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1562 1563 /* Then each computes number of send/recvs */ 1564 if (mumps->is_omp_master) { 1565 /* Start from 1 since self communication is not done in MPI */ 1566 nreqs = 0; 1567 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1568 } else { 1569 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1570 } 1571 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1572 1573 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1574 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1575 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1576 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1577 */ 1578 nreqs = 0; /* counter for actual send/recvs */ 1579 if (mumps->is_omp_master) { 1580 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1581 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1582 PetscCall(PetscMalloc1(totnnz, &val)); 1583 1584 /* Self communication */ 1585 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1586 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1587 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1588 1589 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1590 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1591 PetscCall(PetscFree(mumps->val_alloc)); 1592 mumps->nnz = totnnz; 1593 mumps->irn = irn; 1594 mumps->jcn = jcn; 1595 mumps->val = mumps->val_alloc = val; 1596 1597 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1598 jcn += mumps->recvcount[0]; 1599 val += mumps->recvcount[0]; 1600 1601 /* Remote communication */ 1602 for (i = 1; i < osize; i++) { 1603 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1604 remain = mumps->recvcount[i] - count; 1605 while (count > 0) { 1606 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1607 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1608 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1609 irn += count; 1610 jcn += count; 1611 val += count; 1612 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1613 remain -= count; 1614 } 1615 } 1616 } else { 1617 irn = mumps->irn; 1618 jcn = mumps->jcn; 1619 val = mumps->val; 1620 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1621 remain = mumps->nnz - count; 1622 while (count > 0) { 1623 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1624 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1625 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1626 irn += count; 1627 jcn += count; 1628 val += count; 1629 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1630 remain -= count; 1631 } 1632 } 1633 } else { 1634 nreqs = 0; 1635 if (mumps->is_omp_master) { 1636 val = mumps->val + mumps->recvcount[0]; 1637 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1638 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1639 remain = mumps->recvcount[i] - count; 1640 while (count > 0) { 1641 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1642 val += count; 1643 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1644 remain -= count; 1645 } 1646 } 1647 } else { 1648 val = mumps->val; 1649 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1650 remain = mumps->nnz - count; 1651 while (count > 0) { 1652 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1653 val += count; 1654 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1655 remain -= count; 1656 } 1657 } 1658 } 1659 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1660 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1661 } 1662 PetscFunctionReturn(PETSC_SUCCESS); 1663 } 1664 1665 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1666 { 1667 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1668 PetscBool isMPIAIJ; 1669 1670 PetscFunctionBegin; 1671 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1672 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1673 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1674 PetscFunctionReturn(PETSC_SUCCESS); 1675 } 1676 1677 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1678 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1679 1680 /* numerical factorization phase */ 1681 mumps->id.job = JOB_FACTNUMERIC; 1682 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1683 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1684 } else { 1685 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1686 } 1687 PetscMUMPS_c(mumps); 1688 if (mumps->id.INFOG(1) < 0) { 1689 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 1690 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1691 PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1692 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1693 } else if (mumps->id.INFOG(1) == -13) { 1694 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1695 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1696 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1697 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1698 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1699 } else { 1700 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1701 F->factorerrortype = MAT_FACTOR_OTHER; 1702 } 1703 } 1704 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, " mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16)); 1705 1706 F->assembled = PETSC_TRUE; 1707 1708 if (F->schur) { /* reset Schur status to unfactored */ 1709 #if defined(PETSC_HAVE_CUDA) 1710 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1711 #endif 1712 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1713 mumps->id.ICNTL(19) = 2; 1714 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1715 } 1716 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1717 } 1718 1719 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1720 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1721 1722 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1723 if (mumps->petsc_size > 1) { 1724 PetscInt lsol_loc; 1725 PetscScalar *sol_loc; 1726 1727 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1728 1729 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1730 if (mumps->x_seq) { 1731 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1732 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1733 PetscCall(VecDestroy(&mumps->x_seq)); 1734 } 1735 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1736 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1737 mumps->id.lsol_loc = lsol_loc; 1738 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1739 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 1740 } 1741 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1742 PetscFunctionReturn(PETSC_SUCCESS); 1743 } 1744 1745 /* Sets MUMPS options from the options database */ 1746 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1747 { 1748 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1749 PetscMUMPSInt icntl = 0, size, *listvar_schur; 1750 PetscInt info[80], i, ninfo = 80, rbs, cbs; 1751 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1752 MumpsScalar *arr; 1753 1754 PetscFunctionBegin; 1755 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 1756 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1757 PetscInt nthreads = 0; 1758 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1759 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1760 1761 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1762 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 1763 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 1764 1765 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 1766 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1767 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1768 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 1769 if (mumps->use_petsc_omp_support) { 1770 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 1771 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1772 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1773 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1774 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 1775 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 1776 #endif 1777 } else { 1778 mumps->omp_comm = PETSC_COMM_SELF; 1779 mumps->mumps_comm = mumps->petsc_comm; 1780 mumps->is_omp_master = PETSC_TRUE; 1781 } 1782 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 1783 mumps->reqs = NULL; 1784 mumps->tag = 0; 1785 1786 if (mumps->mumps_comm != MPI_COMM_NULL) { 1787 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 1788 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1789 MPI_Comm comm; 1790 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 1791 mumps->mumps_comm = comm; 1792 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 1793 } 1794 1795 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1796 mumps->id.job = JOB_INIT; 1797 mumps->id.par = 1; /* host participates factorizaton and solve */ 1798 mumps->id.sym = mumps->sym; 1799 1800 size = mumps->id.size_schur; 1801 arr = mumps->id.schur; 1802 listvar_schur = mumps->id.listvar_schur; 1803 PetscMUMPS_c(mumps); 1804 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1805 /* restore cached ICNTL and CNTL values */ 1806 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 1807 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 1808 PetscCall(PetscFree(mumps->ICNTL_pre)); 1809 PetscCall(PetscFree(mumps->CNTL_pre)); 1810 1811 if (schur) { 1812 mumps->id.size_schur = size; 1813 mumps->id.schur_lld = size; 1814 mumps->id.schur = arr; 1815 mumps->id.listvar_schur = listvar_schur; 1816 if (mumps->petsc_size > 1) { 1817 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 1818 1819 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1820 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1821 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 1822 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 1823 } else { 1824 if (F->factortype == MAT_FACTOR_LU) { 1825 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1826 } else { 1827 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1828 } 1829 } 1830 mumps->id.ICNTL(26) = -1; 1831 } 1832 1833 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1834 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1835 */ 1836 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 1837 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 1838 1839 mumps->scat_rhs = NULL; 1840 mumps->scat_sol = NULL; 1841 1842 /* set PETSc-MUMPS default options - override MUMPS default */ 1843 mumps->id.ICNTL(3) = 0; 1844 mumps->id.ICNTL(4) = 0; 1845 if (mumps->petsc_size == 1) { 1846 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1847 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1848 } else { 1849 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1850 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1851 } 1852 } 1853 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 1854 if (flg) mumps->id.ICNTL(1) = icntl; 1855 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 1856 if (flg) mumps->id.ICNTL(2) = icntl; 1857 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 1858 if (flg) mumps->id.ICNTL(3) = icntl; 1859 1860 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 1861 if (flg) mumps->id.ICNTL(4) = icntl; 1862 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1863 1864 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 1865 if (flg) mumps->id.ICNTL(6) = icntl; 1866 1867 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 1868 if (flg) { 1869 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 1870 mumps->id.ICNTL(7) = icntl; 1871 } 1872 1873 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 1874 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 1875 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 1876 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 1877 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 1878 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 1879 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 1880 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 1881 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 1882 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 1883 if (flg) { 1884 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 1885 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 1886 } 1887 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 1888 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1889 PetscCall(MatDestroy(&F->schur)); 1890 PetscCall(MatMumpsResetSchur_Private(mumps)); 1891 } 1892 1893 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 1894 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1895 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1896 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 1897 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 1898 In short, we could not use distributed RHS with MPICH until v4.0b1. 1899 */ 1900 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 1901 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1902 #else 1903 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1904 #endif 1905 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 1906 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 1907 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 1908 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 1909 #endif 1910 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 1911 1912 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 1913 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 1914 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 1915 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 1916 1917 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 1918 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 1919 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 1920 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 1921 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 1922 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 1923 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 1924 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 1925 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 1926 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 1927 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 1928 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 1929 1930 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 1931 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 1932 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 1933 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 1934 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 1935 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 1936 1937 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 1938 1939 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 1940 if (ninfo) { 1941 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 1942 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 1943 mumps->ninfo = ninfo; 1944 for (i = 0; i < ninfo; i++) { 1945 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 1946 mumps->info[i] = info[i]; 1947 } 1948 } 1949 PetscOptionsEnd(); 1950 PetscFunctionReturn(PETSC_SUCCESS); 1951 } 1952 1953 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 1954 { 1955 PetscFunctionBegin; 1956 if (mumps->id.INFOG(1) < 0) { 1957 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1958 if (mumps->id.INFOG(1) == -6) { 1959 PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1960 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1961 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1962 PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1963 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1964 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1965 PetscCall(PetscInfo(F, "Empty matrix\n")); 1966 } else { 1967 PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1968 F->factorerrortype = MAT_FACTOR_OTHER; 1969 } 1970 } 1971 PetscFunctionReturn(PETSC_SUCCESS); 1972 } 1973 1974 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 1975 { 1976 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1977 Vec b; 1978 const PetscInt M = A->rmap->N; 1979 1980 PetscFunctionBegin; 1981 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 1982 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 1983 PetscFunctionReturn(PETSC_SUCCESS); 1984 } 1985 1986 /* Set MUMPS options from the options database */ 1987 PetscCall(MatSetFromOptions_MUMPS(F, A)); 1988 1989 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 1990 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 1991 1992 /* analysis phase */ 1993 mumps->id.job = JOB_FACTSYMBOLIC; 1994 mumps->id.n = M; 1995 switch (mumps->id.ICNTL(18)) { 1996 case 0: /* centralized assembled matrix input */ 1997 if (!mumps->myid) { 1998 mumps->id.nnz = mumps->nnz; 1999 mumps->id.irn = mumps->irn; 2000 mumps->id.jcn = mumps->jcn; 2001 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2002 if (r) { 2003 mumps->id.ICNTL(7) = 1; 2004 if (!mumps->myid) { 2005 const PetscInt *idx; 2006 PetscInt i; 2007 2008 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2009 PetscCall(ISGetIndices(r, &idx)); 2010 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2011 PetscCall(ISRestoreIndices(r, &idx)); 2012 } 2013 } 2014 } 2015 break; 2016 case 3: /* distributed assembled matrix input (size>1) */ 2017 mumps->id.nnz_loc = mumps->nnz; 2018 mumps->id.irn_loc = mumps->irn; 2019 mumps->id.jcn_loc = mumps->jcn; 2020 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2021 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2022 PetscCall(MatCreateVecs(A, NULL, &b)); 2023 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2024 PetscCall(VecDestroy(&b)); 2025 } 2026 break; 2027 } 2028 PetscMUMPS_c(mumps); 2029 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2030 2031 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2032 F->ops->solve = MatSolve_MUMPS; 2033 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2034 F->ops->matsolve = MatMatSolve_MUMPS; 2035 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2036 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2037 2038 mumps->matstruc = SAME_NONZERO_PATTERN; 2039 PetscFunctionReturn(PETSC_SUCCESS); 2040 } 2041 2042 /* Note the Petsc r and c permutations are ignored */ 2043 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2044 { 2045 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2046 Vec b; 2047 const PetscInt M = A->rmap->N; 2048 2049 PetscFunctionBegin; 2050 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2051 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2052 PetscFunctionReturn(PETSC_SUCCESS); 2053 } 2054 2055 /* Set MUMPS options from the options database */ 2056 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2057 2058 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2059 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2060 2061 /* analysis phase */ 2062 mumps->id.job = JOB_FACTSYMBOLIC; 2063 mumps->id.n = M; 2064 switch (mumps->id.ICNTL(18)) { 2065 case 0: /* centralized assembled matrix input */ 2066 if (!mumps->myid) { 2067 mumps->id.nnz = mumps->nnz; 2068 mumps->id.irn = mumps->irn; 2069 mumps->id.jcn = mumps->jcn; 2070 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2071 } 2072 break; 2073 case 3: /* distributed assembled matrix input (size>1) */ 2074 mumps->id.nnz_loc = mumps->nnz; 2075 mumps->id.irn_loc = mumps->irn; 2076 mumps->id.jcn_loc = mumps->jcn; 2077 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2078 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2079 PetscCall(MatCreateVecs(A, NULL, &b)); 2080 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2081 PetscCall(VecDestroy(&b)); 2082 } 2083 break; 2084 } 2085 PetscMUMPS_c(mumps); 2086 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2087 2088 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2089 F->ops->solve = MatSolve_MUMPS; 2090 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2091 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2092 2093 mumps->matstruc = SAME_NONZERO_PATTERN; 2094 PetscFunctionReturn(PETSC_SUCCESS); 2095 } 2096 2097 /* Note the Petsc r permutation and factor info are ignored */ 2098 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2099 { 2100 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2101 Vec b; 2102 const PetscInt M = A->rmap->N; 2103 2104 PetscFunctionBegin; 2105 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2106 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2107 PetscFunctionReturn(PETSC_SUCCESS); 2108 } 2109 2110 /* Set MUMPS options from the options database */ 2111 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2112 2113 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2114 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2115 2116 /* analysis phase */ 2117 mumps->id.job = JOB_FACTSYMBOLIC; 2118 mumps->id.n = M; 2119 switch (mumps->id.ICNTL(18)) { 2120 case 0: /* centralized assembled matrix input */ 2121 if (!mumps->myid) { 2122 mumps->id.nnz = mumps->nnz; 2123 mumps->id.irn = mumps->irn; 2124 mumps->id.jcn = mumps->jcn; 2125 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2126 } 2127 break; 2128 case 3: /* distributed assembled matrix input (size>1) */ 2129 mumps->id.nnz_loc = mumps->nnz; 2130 mumps->id.irn_loc = mumps->irn; 2131 mumps->id.jcn_loc = mumps->jcn; 2132 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2133 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2134 PetscCall(MatCreateVecs(A, NULL, &b)); 2135 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2136 PetscCall(VecDestroy(&b)); 2137 } 2138 break; 2139 } 2140 PetscMUMPS_c(mumps); 2141 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2142 2143 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2144 F->ops->solve = MatSolve_MUMPS; 2145 F->ops->solvetranspose = MatSolve_MUMPS; 2146 F->ops->matsolve = MatMatSolve_MUMPS; 2147 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2148 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2149 #if defined(PETSC_USE_COMPLEX) 2150 F->ops->getinertia = NULL; 2151 #else 2152 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2153 #endif 2154 2155 mumps->matstruc = SAME_NONZERO_PATTERN; 2156 PetscFunctionReturn(PETSC_SUCCESS); 2157 } 2158 2159 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2160 { 2161 PetscBool iascii; 2162 PetscViewerFormat format; 2163 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2164 2165 PetscFunctionBegin; 2166 /* check if matrix is mumps type */ 2167 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2168 2169 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2170 if (iascii) { 2171 PetscCall(PetscViewerGetFormat(viewer, &format)); 2172 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2173 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2174 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2175 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2176 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2177 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2178 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2179 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2180 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2181 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2182 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2183 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2184 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2185 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2186 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2187 if (mumps->id.ICNTL(11) > 0) { 2188 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2189 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2190 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2191 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2192 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2193 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2194 } 2195 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2196 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2197 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2198 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2199 /* ICNTL(15-17) not used */ 2200 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2201 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2202 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2203 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2204 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2205 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2206 2207 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2208 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2209 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2210 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2211 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2212 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2213 2214 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2215 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2216 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2217 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2218 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2219 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2220 2221 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2222 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2223 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2224 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2225 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2226 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2227 2228 /* information local to each processor */ 2229 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2230 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2231 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2232 PetscCall(PetscViewerFlush(viewer)); 2233 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2234 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2235 PetscCall(PetscViewerFlush(viewer)); 2236 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2237 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2238 PetscCall(PetscViewerFlush(viewer)); 2239 2240 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2241 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2242 PetscCall(PetscViewerFlush(viewer)); 2243 2244 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2245 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2246 PetscCall(PetscViewerFlush(viewer)); 2247 2248 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2249 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2250 PetscCall(PetscViewerFlush(viewer)); 2251 2252 if (mumps->ninfo && mumps->ninfo <= 80) { 2253 PetscInt i; 2254 for (i = 0; i < mumps->ninfo; i++) { 2255 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2256 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2257 PetscCall(PetscViewerFlush(viewer)); 2258 } 2259 } 2260 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2261 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2262 2263 if (mumps->myid == 0) { /* information from the host */ 2264 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2265 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2266 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2267 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2268 2269 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2270 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2271 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2272 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2273 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2274 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2275 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2276 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2277 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2278 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2279 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2280 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2281 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2282 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2283 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2284 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2285 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2286 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2287 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2288 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2289 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2290 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2291 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2292 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2293 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2294 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2295 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2296 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2297 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2298 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2299 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2300 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2301 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2302 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2303 } 2304 } 2305 } 2306 PetscFunctionReturn(PETSC_SUCCESS); 2307 } 2308 2309 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2310 { 2311 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2312 2313 PetscFunctionBegin; 2314 info->block_size = 1.0; 2315 info->nz_allocated = mumps->id.INFOG(20); 2316 info->nz_used = mumps->id.INFOG(20); 2317 info->nz_unneeded = 0.0; 2318 info->assemblies = 0.0; 2319 info->mallocs = 0.0; 2320 info->memory = 0.0; 2321 info->fill_ratio_given = 0; 2322 info->fill_ratio_needed = 0; 2323 info->factor_mallocs = 0; 2324 PetscFunctionReturn(PETSC_SUCCESS); 2325 } 2326 2327 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2328 { 2329 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2330 const PetscScalar *arr; 2331 const PetscInt *idxs; 2332 PetscInt size, i; 2333 2334 PetscFunctionBegin; 2335 PetscCall(ISGetLocalSize(is, &size)); 2336 /* Schur complement matrix */ 2337 PetscCall(MatDestroy(&F->schur)); 2338 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2339 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2340 mumps->id.schur = (MumpsScalar *)arr; 2341 mumps->id.size_schur = size; 2342 mumps->id.schur_lld = size; 2343 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2344 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2345 2346 /* MUMPS expects Fortran style indices */ 2347 PetscCall(PetscFree(mumps->id.listvar_schur)); 2348 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2349 PetscCall(ISGetIndices(is, &idxs)); 2350 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2351 PetscCall(ISRestoreIndices(is, &idxs)); 2352 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2353 mumps->id.ICNTL(26) = -1; 2354 PetscFunctionReturn(PETSC_SUCCESS); 2355 } 2356 2357 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2358 { 2359 Mat St; 2360 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2361 PetscScalar *array; 2362 #if defined(PETSC_USE_COMPLEX) 2363 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2364 #endif 2365 2366 PetscFunctionBegin; 2367 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2368 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2369 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2370 PetscCall(MatSetType(St, MATDENSE)); 2371 PetscCall(MatSetUp(St)); 2372 PetscCall(MatDenseGetArray(St, &array)); 2373 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2374 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2375 PetscInt i, j, N = mumps->id.size_schur; 2376 for (i = 0; i < N; i++) { 2377 for (j = 0; j < N; j++) { 2378 #if !defined(PETSC_USE_COMPLEX) 2379 PetscScalar val = mumps->id.schur[i * N + j]; 2380 #else 2381 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2382 #endif 2383 array[j * N + i] = val; 2384 } 2385 } 2386 } else { /* stored by columns */ 2387 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2388 } 2389 } else { /* either full or lower-triangular (not packed) */ 2390 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2391 PetscInt i, j, N = mumps->id.size_schur; 2392 for (i = 0; i < N; i++) { 2393 for (j = i; j < N; j++) { 2394 #if !defined(PETSC_USE_COMPLEX) 2395 PetscScalar val = mumps->id.schur[i * N + j]; 2396 #else 2397 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2398 #endif 2399 array[i * N + j] = val; 2400 array[j * N + i] = val; 2401 } 2402 } 2403 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2404 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2405 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2406 PetscInt i, j, N = mumps->id.size_schur; 2407 for (i = 0; i < N; i++) { 2408 for (j = 0; j < i + 1; j++) { 2409 #if !defined(PETSC_USE_COMPLEX) 2410 PetscScalar val = mumps->id.schur[i * N + j]; 2411 #else 2412 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2413 #endif 2414 array[i * N + j] = val; 2415 array[j * N + i] = val; 2416 } 2417 } 2418 } 2419 } 2420 PetscCall(MatDenseRestoreArray(St, &array)); 2421 *S = St; 2422 PetscFunctionReturn(PETSC_SUCCESS); 2423 } 2424 2425 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2426 { 2427 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2428 2429 PetscFunctionBegin; 2430 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2431 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2432 for (i = 0; i < nICNTL_pre; ++i) 2433 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2434 if (i == nICNTL_pre) { /* not already cached */ 2435 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2436 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2437 mumps->ICNTL_pre[0]++; 2438 } 2439 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2440 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2441 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2442 PetscFunctionReturn(PETSC_SUCCESS); 2443 } 2444 2445 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2446 { 2447 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2448 2449 PetscFunctionBegin; 2450 if (mumps->id.job == JOB_NULL) { 2451 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2452 *ival = 0; 2453 for (i = 0; i < nICNTL_pre; ++i) { 2454 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2455 } 2456 } else *ival = mumps->id.ICNTL(icntl); 2457 PetscFunctionReturn(PETSC_SUCCESS); 2458 } 2459 2460 /*@ 2461 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2462 2463 Logically Collective 2464 2465 Input Parameters: 2466 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2467 . icntl - index of MUMPS parameter array ICNTL() 2468 - ival - value of MUMPS ICNTL(icntl) 2469 2470 Options Database Key: 2471 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2472 2473 Level: beginner 2474 2475 References: 2476 . * - MUMPS Users' Guide 2477 2478 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2479 @*/ 2480 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2481 { 2482 PetscFunctionBegin; 2483 PetscValidType(F, 1); 2484 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2485 PetscValidLogicalCollectiveInt(F, icntl, 2); 2486 PetscValidLogicalCollectiveInt(F, ival, 3); 2487 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2488 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2489 PetscFunctionReturn(PETSC_SUCCESS); 2490 } 2491 2492 /*@ 2493 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2494 2495 Logically Collective 2496 2497 Input Parameters: 2498 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2499 - icntl - index of MUMPS parameter array ICNTL() 2500 2501 Output Parameter: 2502 . ival - value of MUMPS ICNTL(icntl) 2503 2504 Level: beginner 2505 2506 References: 2507 . * - MUMPS Users' Guide 2508 2509 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2510 @*/ 2511 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2512 { 2513 PetscFunctionBegin; 2514 PetscValidType(F, 1); 2515 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2516 PetscValidLogicalCollectiveInt(F, icntl, 2); 2517 PetscValidIntPointer(ival, 3); 2518 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2519 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2520 PetscFunctionReturn(PETSC_SUCCESS); 2521 } 2522 2523 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2524 { 2525 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2526 2527 PetscFunctionBegin; 2528 if (mumps->id.job == JOB_NULL) { 2529 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2530 for (i = 0; i < nCNTL_pre; ++i) 2531 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2532 if (i == nCNTL_pre) { 2533 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2534 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2535 mumps->CNTL_pre[0]++; 2536 } 2537 mumps->CNTL_pre[1 + 2 * i] = icntl; 2538 mumps->CNTL_pre[2 + 2 * i] = val; 2539 } else mumps->id.CNTL(icntl) = val; 2540 PetscFunctionReturn(PETSC_SUCCESS); 2541 } 2542 2543 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2544 { 2545 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2546 2547 PetscFunctionBegin; 2548 if (mumps->id.job == JOB_NULL) { 2549 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2550 *val = 0.0; 2551 for (i = 0; i < nCNTL_pre; ++i) { 2552 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2553 } 2554 } else *val = mumps->id.CNTL(icntl); 2555 PetscFunctionReturn(PETSC_SUCCESS); 2556 } 2557 2558 /*@ 2559 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2560 2561 Logically Collective 2562 2563 Input Parameters: 2564 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2565 . icntl - index of MUMPS parameter array CNTL() 2566 - val - value of MUMPS CNTL(icntl) 2567 2568 Options Database Key: 2569 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2570 2571 Level: beginner 2572 2573 References: 2574 . * - MUMPS Users' Guide 2575 2576 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2577 @*/ 2578 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2579 { 2580 PetscFunctionBegin; 2581 PetscValidType(F, 1); 2582 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2583 PetscValidLogicalCollectiveInt(F, icntl, 2); 2584 PetscValidLogicalCollectiveReal(F, val, 3); 2585 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2586 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2587 PetscFunctionReturn(PETSC_SUCCESS); 2588 } 2589 2590 /*@ 2591 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2592 2593 Logically Collective 2594 2595 Input Parameters: 2596 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2597 - icntl - index of MUMPS parameter array CNTL() 2598 2599 Output Parameter: 2600 . val - value of MUMPS CNTL(icntl) 2601 2602 Level: beginner 2603 2604 References: 2605 . * - MUMPS Users' Guide 2606 2607 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2608 @*/ 2609 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2610 { 2611 PetscFunctionBegin; 2612 PetscValidType(F, 1); 2613 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2614 PetscValidLogicalCollectiveInt(F, icntl, 2); 2615 PetscValidRealPointer(val, 3); 2616 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2617 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2618 PetscFunctionReturn(PETSC_SUCCESS); 2619 } 2620 2621 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2622 { 2623 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2624 2625 PetscFunctionBegin; 2626 *info = mumps->id.INFO(icntl); 2627 PetscFunctionReturn(PETSC_SUCCESS); 2628 } 2629 2630 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2631 { 2632 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2633 2634 PetscFunctionBegin; 2635 *infog = mumps->id.INFOG(icntl); 2636 PetscFunctionReturn(PETSC_SUCCESS); 2637 } 2638 2639 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2640 { 2641 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2642 2643 PetscFunctionBegin; 2644 *rinfo = mumps->id.RINFO(icntl); 2645 PetscFunctionReturn(PETSC_SUCCESS); 2646 } 2647 2648 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2649 { 2650 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2651 2652 PetscFunctionBegin; 2653 *rinfog = mumps->id.RINFOG(icntl); 2654 PetscFunctionReturn(PETSC_SUCCESS); 2655 } 2656 2657 PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2658 { 2659 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2660 2661 PetscFunctionBegin; 2662 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2663 *size = 0; 2664 *array = NULL; 2665 if (!mumps->myid) { 2666 *size = mumps->id.INFOG(28); 2667 PetscCall(PetscMalloc1(*size, array)); 2668 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2669 } 2670 PetscFunctionReturn(PETSC_SUCCESS); 2671 } 2672 2673 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2674 { 2675 Mat Bt = NULL, Btseq = NULL; 2676 PetscBool flg; 2677 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2678 PetscScalar *aa; 2679 PetscInt spnr, *ia, *ja, M, nrhs; 2680 2681 PetscFunctionBegin; 2682 PetscValidPointer(spRHS, 2); 2683 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2684 if (flg) { 2685 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2686 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2687 2688 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2689 2690 if (mumps->petsc_size > 1) { 2691 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2692 Btseq = b->A; 2693 } else { 2694 Btseq = Bt; 2695 } 2696 2697 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2698 mumps->id.nrhs = nrhs; 2699 mumps->id.lrhs = M; 2700 mumps->id.rhs = NULL; 2701 2702 if (!mumps->myid) { 2703 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2704 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2705 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2706 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2707 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2708 } else { 2709 mumps->id.irhs_ptr = NULL; 2710 mumps->id.irhs_sparse = NULL; 2711 mumps->id.nz_rhs = 0; 2712 mumps->id.rhs_sparse = NULL; 2713 } 2714 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2715 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2716 2717 /* solve phase */ 2718 mumps->id.job = JOB_SOLVE; 2719 PetscMUMPS_c(mumps); 2720 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 2721 2722 if (!mumps->myid) { 2723 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 2724 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2725 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2726 } 2727 PetscFunctionReturn(PETSC_SUCCESS); 2728 } 2729 2730 /*@ 2731 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 2732 2733 Logically Collective 2734 2735 Input Parameter: 2736 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2737 2738 Output Parameter: 2739 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 2740 2741 Level: beginner 2742 2743 References: 2744 . * - MUMPS Users' Guide 2745 2746 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 2747 @*/ 2748 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2749 { 2750 PetscFunctionBegin; 2751 PetscValidType(F, 1); 2752 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2753 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 2754 PetscFunctionReturn(PETSC_SUCCESS); 2755 } 2756 2757 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 2758 { 2759 Mat spRHS; 2760 2761 PetscFunctionBegin; 2762 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 2763 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 2764 PetscCall(MatDestroy(&spRHS)); 2765 PetscFunctionReturn(PETSC_SUCCESS); 2766 } 2767 2768 /*@ 2769 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 2770 2771 Logically Collective 2772 2773 Input Parameter: 2774 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2775 2776 Output Parameter: 2777 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 2778 2779 Level: beginner 2780 2781 References: 2782 . * - MUMPS Users' Guide 2783 2784 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 2785 @*/ 2786 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 2787 { 2788 PetscBool flg; 2789 2790 PetscFunctionBegin; 2791 PetscValidType(F, 1); 2792 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2793 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 2794 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 2795 2796 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 2797 PetscFunctionReturn(PETSC_SUCCESS); 2798 } 2799 2800 /*@ 2801 MatMumpsGetInfo - Get MUMPS parameter INFO() 2802 2803 Logically Collective 2804 2805 Input Parameters: 2806 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2807 - icntl - index of MUMPS parameter array INFO() 2808 2809 Output Parameter: 2810 . ival - value of MUMPS INFO(icntl) 2811 2812 Level: beginner 2813 2814 References: 2815 . * - MUMPS Users' Guide 2816 2817 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2818 @*/ 2819 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 2820 { 2821 PetscFunctionBegin; 2822 PetscValidType(F, 1); 2823 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2824 PetscValidIntPointer(ival, 3); 2825 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2826 PetscFunctionReturn(PETSC_SUCCESS); 2827 } 2828 2829 /*@ 2830 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2831 2832 Logically Collective 2833 2834 Input Parameters: 2835 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2836 - icntl - index of MUMPS parameter array INFOG() 2837 2838 Output Parameter: 2839 . ival - value of MUMPS INFOG(icntl) 2840 2841 Level: beginner 2842 2843 References: 2844 . * - MUMPS Users' Guide 2845 2846 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2847 @*/ 2848 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 2849 { 2850 PetscFunctionBegin; 2851 PetscValidType(F, 1); 2852 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2853 PetscValidIntPointer(ival, 3); 2854 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2855 PetscFunctionReturn(PETSC_SUCCESS); 2856 } 2857 2858 /*@ 2859 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2860 2861 Logically Collective 2862 2863 Input Parameters: 2864 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2865 - icntl - index of MUMPS parameter array RINFO() 2866 2867 Output Parameter: 2868 . val - value of MUMPS RINFO(icntl) 2869 2870 Level: beginner 2871 2872 References: 2873 . * - MUMPS Users' Guide 2874 2875 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 2876 @*/ 2877 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 2878 { 2879 PetscFunctionBegin; 2880 PetscValidType(F, 1); 2881 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2882 PetscValidRealPointer(val, 3); 2883 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2884 PetscFunctionReturn(PETSC_SUCCESS); 2885 } 2886 2887 /*@ 2888 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2889 2890 Logically Collective 2891 2892 Input Parameters: 2893 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2894 - icntl - index of MUMPS parameter array RINFOG() 2895 2896 Output Parameter: 2897 . val - value of MUMPS RINFOG(icntl) 2898 2899 Level: beginner 2900 2901 References: 2902 . * - MUMPS Users' Guide 2903 2904 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 2905 @*/ 2906 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 2907 { 2908 PetscFunctionBegin; 2909 PetscValidType(F, 1); 2910 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2911 PetscValidRealPointer(val, 3); 2912 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2913 PetscFunctionReturn(PETSC_SUCCESS); 2914 } 2915 2916 /*@ 2917 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 2918 2919 Logically Collective 2920 2921 Input Parameter: 2922 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2923 2924 Output Parameters: 2925 + size - local size of the array. The size of the array is non-zero only on the host. 2926 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 2927 for freeing this array. 2928 2929 Level: beginner 2930 2931 References: 2932 . * - MUMPS Users' Guide 2933 2934 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 2935 @*/ 2936 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 2937 { 2938 PetscFunctionBegin; 2939 PetscValidType(F, 1); 2940 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2941 PetscValidIntPointer(size, 3); 2942 PetscValidPointer(array, 4); 2943 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 2944 PetscFunctionReturn(PETSC_SUCCESS); 2945 } 2946 2947 /*MC 2948 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2949 distributed and sequential matrices via the external package MUMPS. 2950 2951 Works with `MATAIJ` and `MATSBAIJ` matrices 2952 2953 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2954 2955 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 2956 See details below. 2957 2958 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 2959 2960 Options Database Keys: 2961 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2962 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2963 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2964 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2965 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2966 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2967 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 2968 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2969 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2970 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2971 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2972 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2973 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2974 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 2975 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2976 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2977 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2978 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2979 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2980 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2981 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2982 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2983 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2984 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2985 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2986 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2987 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2988 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2989 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2990 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2991 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2992 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2993 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2994 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2995 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2996 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2997 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2998 2999 Level: beginner 3000 3001 Notes: 3002 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 3003 error if the matrix is Hermitian. 3004 3005 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3006 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3007 3008 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3009 the failure with 3010 .vb 3011 KSPGetPC(ksp,&pc); 3012 PCFactorGetMatrix(pc,&mat); 3013 MatMumpsGetInfo(mat,....); 3014 MatMumpsGetInfog(mat,....); etc. 3015 .ve 3016 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3017 3018 MUMPS provides 64-bit integer support in two build modes: 3019 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3020 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3021 3022 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3023 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3024 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3025 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3026 3027 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3028 3029 Two modes to run MUMPS/PETSc with OpenMP 3030 .vb 3031 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3032 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3033 .ve 3034 3035 .vb 3036 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3037 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3038 .ve 3039 3040 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3041 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3042 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3043 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3044 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3045 3046 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3047 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3048 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3049 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3050 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3051 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3052 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3053 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3054 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3055 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3056 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3057 examine the mapping result. 3058 3059 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3060 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3061 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3062 3063 References: 3064 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3065 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3066 3067 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3068 M*/ 3069 3070 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3071 { 3072 PetscFunctionBegin; 3073 *type = MATSOLVERMUMPS; 3074 PetscFunctionReturn(PETSC_SUCCESS); 3075 } 3076 3077 /* MatGetFactor for Seq and MPI AIJ matrices */ 3078 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3079 { 3080 Mat B; 3081 Mat_MUMPS *mumps; 3082 PetscBool isSeqAIJ; 3083 PetscMPIInt size; 3084 3085 PetscFunctionBegin; 3086 #if defined(PETSC_USE_COMPLEX) 3087 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3088 #endif 3089 /* Create the factorization matrix */ 3090 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3091 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3092 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3093 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3094 PetscCall(MatSetUp(B)); 3095 3096 PetscCall(PetscNew(&mumps)); 3097 3098 B->ops->view = MatView_MUMPS; 3099 B->ops->getinfo = MatGetInfo_MUMPS; 3100 3101 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3102 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3103 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3104 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3105 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3106 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3107 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3108 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3109 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3110 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3111 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3112 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3113 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3114 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3115 3116 if (ftype == MAT_FACTOR_LU) { 3117 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3118 B->factortype = MAT_FACTOR_LU; 3119 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3120 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3121 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3122 mumps->sym = 0; 3123 } else { 3124 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3125 B->factortype = MAT_FACTOR_CHOLESKY; 3126 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3127 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3128 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3129 #if defined(PETSC_USE_COMPLEX) 3130 mumps->sym = 2; 3131 #else 3132 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3133 else mumps->sym = 2; 3134 #endif 3135 } 3136 3137 /* set solvertype */ 3138 PetscCall(PetscFree(B->solvertype)); 3139 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3140 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3141 if (size == 1) { 3142 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3143 B->canuseordering = PETSC_TRUE; 3144 } 3145 B->ops->destroy = MatDestroy_MUMPS; 3146 B->data = (void *)mumps; 3147 3148 *F = B; 3149 mumps->id.job = JOB_NULL; 3150 mumps->ICNTL_pre = NULL; 3151 mumps->CNTL_pre = NULL; 3152 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3153 PetscFunctionReturn(PETSC_SUCCESS); 3154 } 3155 3156 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3157 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3158 { 3159 Mat B; 3160 Mat_MUMPS *mumps; 3161 PetscBool isSeqSBAIJ; 3162 PetscMPIInt size; 3163 3164 PetscFunctionBegin; 3165 #if defined(PETSC_USE_COMPLEX) 3166 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3167 #endif 3168 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3169 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3170 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3171 PetscCall(MatSetUp(B)); 3172 3173 PetscCall(PetscNew(&mumps)); 3174 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3175 if (isSeqSBAIJ) { 3176 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3177 } else { 3178 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3179 } 3180 3181 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3182 B->ops->view = MatView_MUMPS; 3183 B->ops->getinfo = MatGetInfo_MUMPS; 3184 3185 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3186 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3187 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3188 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3189 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3190 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3191 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3192 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3193 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3194 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3195 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3196 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3197 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3198 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3199 3200 B->factortype = MAT_FACTOR_CHOLESKY; 3201 #if defined(PETSC_USE_COMPLEX) 3202 mumps->sym = 2; 3203 #else 3204 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3205 else mumps->sym = 2; 3206 #endif 3207 3208 /* set solvertype */ 3209 PetscCall(PetscFree(B->solvertype)); 3210 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3211 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3212 if (size == 1) { 3213 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3214 B->canuseordering = PETSC_TRUE; 3215 } 3216 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3217 B->ops->destroy = MatDestroy_MUMPS; 3218 B->data = (void *)mumps; 3219 3220 *F = B; 3221 mumps->id.job = JOB_NULL; 3222 mumps->ICNTL_pre = NULL; 3223 mumps->CNTL_pre = NULL; 3224 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3225 PetscFunctionReturn(PETSC_SUCCESS); 3226 } 3227 3228 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3229 { 3230 Mat B; 3231 Mat_MUMPS *mumps; 3232 PetscBool isSeqBAIJ; 3233 PetscMPIInt size; 3234 3235 PetscFunctionBegin; 3236 /* Create the factorization matrix */ 3237 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3238 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3239 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3240 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3241 PetscCall(MatSetUp(B)); 3242 3243 PetscCall(PetscNew(&mumps)); 3244 if (ftype == MAT_FACTOR_LU) { 3245 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3246 B->factortype = MAT_FACTOR_LU; 3247 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3248 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3249 mumps->sym = 0; 3250 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3251 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3252 3253 B->ops->view = MatView_MUMPS; 3254 B->ops->getinfo = MatGetInfo_MUMPS; 3255 3256 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3257 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3258 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3259 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3260 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3261 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3262 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3263 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3264 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3265 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3266 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3267 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3268 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3269 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3270 3271 /* set solvertype */ 3272 PetscCall(PetscFree(B->solvertype)); 3273 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3274 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3275 if (size == 1) { 3276 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3277 B->canuseordering = PETSC_TRUE; 3278 } 3279 B->ops->destroy = MatDestroy_MUMPS; 3280 B->data = (void *)mumps; 3281 3282 *F = B; 3283 mumps->id.job = JOB_NULL; 3284 mumps->ICNTL_pre = NULL; 3285 mumps->CNTL_pre = NULL; 3286 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3287 PetscFunctionReturn(PETSC_SUCCESS); 3288 } 3289 3290 /* MatGetFactor for Seq and MPI SELL matrices */ 3291 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3292 { 3293 Mat B; 3294 Mat_MUMPS *mumps; 3295 PetscBool isSeqSELL; 3296 PetscMPIInt size; 3297 3298 PetscFunctionBegin; 3299 /* Create the factorization matrix */ 3300 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3301 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3302 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3303 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3304 PetscCall(MatSetUp(B)); 3305 3306 PetscCall(PetscNew(&mumps)); 3307 3308 B->ops->view = MatView_MUMPS; 3309 B->ops->getinfo = MatGetInfo_MUMPS; 3310 3311 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3312 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3313 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3314 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3315 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3316 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3317 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3318 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3319 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3320 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3321 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3322 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3323 3324 if (ftype == MAT_FACTOR_LU) { 3325 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3326 B->factortype = MAT_FACTOR_LU; 3327 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3328 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3329 mumps->sym = 0; 3330 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3331 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3332 3333 /* set solvertype */ 3334 PetscCall(PetscFree(B->solvertype)); 3335 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3336 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3337 if (size == 1) { 3338 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3339 B->canuseordering = PETSC_TRUE; 3340 } 3341 B->ops->destroy = MatDestroy_MUMPS; 3342 B->data = (void *)mumps; 3343 3344 *F = B; 3345 mumps->id.job = JOB_NULL; 3346 mumps->ICNTL_pre = NULL; 3347 mumps->CNTL_pre = NULL; 3348 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3349 PetscFunctionReturn(PETSC_SUCCESS); 3350 } 3351 3352 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3353 { 3354 PetscFunctionBegin; 3355 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3356 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3357 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3358 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3359 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3360 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3361 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3362 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3363 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3364 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3365 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3366 PetscFunctionReturn(PETSC_SUCCESS); 3367 } 3368