1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_NULL 0 27 #define JOB_FACTSYMBOLIC 1 28 #define JOB_FACTNUMERIC 2 29 #define JOB_SOLVE 3 30 #define JOB_END -2 31 32 /* calls to MUMPS */ 33 #if defined(PETSC_USE_COMPLEX) 34 #if defined(PETSC_USE_REAL_SINGLE) 35 #define MUMPS_c cmumps_c 36 #else 37 #define MUMPS_c zmumps_c 38 #endif 39 #else 40 #if defined(PETSC_USE_REAL_SINGLE) 41 #define MUMPS_c smumps_c 42 #else 43 #define MUMPS_c dmumps_c 44 #endif 45 #endif 46 47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 48 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 49 naming convention in PetscMPIInt, PetscBLASInt etc. 50 */ 51 typedef MUMPS_INT PetscMUMPSInt; 52 53 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 54 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 55 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 56 #endif 57 #else 58 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 59 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 60 #endif 61 #endif 62 63 #define MPIU_MUMPSINT MPI_INT 64 #define PETSC_MUMPS_INT_MAX 2147483647 65 #define PETSC_MUMPS_INT_MIN -2147483648 66 67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 69 { 70 PetscFunctionBegin; 71 #if PetscDefined(USE_64BIT_INDICES) 72 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 73 #endif 74 *b = (PetscMUMPSInt)(a); 75 PetscFunctionReturn(PETSC_SUCCESS); 76 } 77 78 /* Put these utility routines here since they are only used in this file */ 79 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 80 { 81 PetscInt myval; 82 PetscBool myset; 83 PetscFunctionBegin; 84 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 85 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 86 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 87 if (set) *set = myset; 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 91 92 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 93 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 94 #define PetscMUMPS_c(mumps) \ 95 do { \ 96 if (mumps->use_petsc_omp_support) { \ 97 if (mumps->is_omp_master) { \ 98 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 99 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 100 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 101 PetscCall(PetscFPTrapPop()); \ 102 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 103 } \ 104 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 105 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 106 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 107 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 108 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 109 */ \ 110 PetscCallMPI(MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 111 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm)); \ 112 PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 113 } else { \ 114 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 115 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 116 PetscCall(PetscFPTrapPop()); \ 117 } \ 118 } while (0) 119 #else 120 #define PetscMUMPS_c(mumps) \ 121 do { \ 122 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 123 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 124 PetscCall(PetscFPTrapPop()); \ 125 } while (0) 126 #endif 127 128 /* declare MumpsScalar */ 129 #if defined(PETSC_USE_COMPLEX) 130 #if defined(PETSC_USE_REAL_SINGLE) 131 #define MumpsScalar mumps_complex 132 #else 133 #define MumpsScalar mumps_double_complex 134 #endif 135 #else 136 #define MumpsScalar PetscScalar 137 #endif 138 139 /* macros s.t. indices match MUMPS documentation */ 140 #define ICNTL(I) icntl[(I)-1] 141 #define CNTL(I) cntl[(I)-1] 142 #define INFOG(I) infog[(I)-1] 143 #define INFO(I) info[(I)-1] 144 #define RINFOG(I) rinfog[(I)-1] 145 #define RINFO(I) rinfo[(I)-1] 146 147 typedef struct Mat_MUMPS Mat_MUMPS; 148 struct Mat_MUMPS { 149 #if defined(PETSC_USE_COMPLEX) 150 #if defined(PETSC_USE_REAL_SINGLE) 151 CMUMPS_STRUC_C id; 152 #else 153 ZMUMPS_STRUC_C id; 154 #endif 155 #else 156 #if defined(PETSC_USE_REAL_SINGLE) 157 SMUMPS_STRUC_C id; 158 #else 159 DMUMPS_STRUC_C id; 160 #endif 161 #endif 162 163 MatStructure matstruc; 164 PetscMPIInt myid, petsc_size; 165 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 166 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 167 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 168 PetscMUMPSInt sym; 169 MPI_Comm mumps_comm; 170 PetscMUMPSInt *ICNTL_pre; 171 PetscReal *CNTL_pre; 172 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 173 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 174 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 175 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 176 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 177 PetscInt *rhs_nrow, max_nrhs; 178 PetscMPIInt *rhs_recvcounts, *rhs_disps; 179 PetscScalar *rhs_loc, *rhs_recvbuf; 180 #endif 181 Vec b_seq, x_seq; 182 PetscInt ninfo, *info; /* which INFO to display */ 183 PetscInt sizeredrhs; 184 PetscScalar *schur_sol; 185 PetscInt schur_sizesol; 186 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 187 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 188 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 189 190 /* stuff used by petsc/mumps OpenMP support*/ 191 PetscBool use_petsc_omp_support; 192 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 193 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 194 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 195 PetscMPIInt tag, omp_comm_size; 196 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 197 MPI_Request *reqs; 198 }; 199 200 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 201 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 202 */ 203 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 204 { 205 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 206 207 PetscFunctionBegin; 208 #if defined(PETSC_USE_64BIT_INDICES) 209 { 210 PetscInt i; 211 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 212 PetscCall(PetscFree(mumps->ia_alloc)); 213 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 214 mumps->cur_ilen = nrow + 1; 215 } 216 if (nnz > mumps->cur_jlen) { 217 PetscCall(PetscFree(mumps->ja_alloc)); 218 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 219 mumps->cur_jlen = nnz; 220 } 221 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 222 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 223 *ia_mumps = mumps->ia_alloc; 224 *ja_mumps = mumps->ja_alloc; 225 } 226 #else 227 *ia_mumps = ia; 228 *ja_mumps = ja; 229 #endif 230 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 231 PetscFunctionReturn(PETSC_SUCCESS); 232 } 233 234 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 235 { 236 PetscFunctionBegin; 237 PetscCall(PetscFree(mumps->id.listvar_schur)); 238 PetscCall(PetscFree(mumps->id.redrhs)); 239 PetscCall(PetscFree(mumps->schur_sol)); 240 mumps->id.size_schur = 0; 241 mumps->id.schur_lld = 0; 242 mumps->id.ICNTL(19) = 0; 243 PetscFunctionReturn(PETSC_SUCCESS); 244 } 245 246 /* solve with rhs in mumps->id.redrhs and return in the same location */ 247 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 248 { 249 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 250 Mat S, B, X; 251 MatFactorSchurStatus schurstatus; 252 PetscInt sizesol; 253 254 PetscFunctionBegin; 255 PetscCall(MatFactorFactorizeSchurComplement(F)); 256 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 257 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 258 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 259 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 260 PetscCall(MatBindToCPU(B, S->boundtocpu)); 261 #endif 262 switch (schurstatus) { 263 case MAT_FACTOR_SCHUR_FACTORED: 264 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 265 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 266 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 267 PetscCall(MatBindToCPU(X, S->boundtocpu)); 268 #endif 269 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 270 PetscCall(MatMatSolveTranspose(S, B, X)); 271 } else { 272 PetscCall(MatMatSolve(S, B, X)); 273 } 274 break; 275 case MAT_FACTOR_SCHUR_INVERTED: 276 sizesol = mumps->id.nrhs * mumps->id.size_schur; 277 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 278 PetscCall(PetscFree(mumps->schur_sol)); 279 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 280 mumps->schur_sizesol = sizesol; 281 } 282 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 283 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 284 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 285 PetscCall(MatBindToCPU(X, S->boundtocpu)); 286 #endif 287 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 288 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 289 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 290 } else { 291 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 292 } 293 PetscCall(MatProductSetFromOptions(X)); 294 PetscCall(MatProductSymbolic(X)); 295 PetscCall(MatProductNumeric(X)); 296 297 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 298 break; 299 default: 300 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 301 } 302 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 303 PetscCall(MatDestroy(&B)); 304 PetscCall(MatDestroy(&X)); 305 PetscFunctionReturn(PETSC_SUCCESS); 306 } 307 308 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 309 { 310 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 311 312 PetscFunctionBegin; 313 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 if (!expansion) { /* prepare for the condensation step */ 317 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 318 /* allocate MUMPS internal array to store reduced right-hand sides */ 319 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 320 PetscCall(PetscFree(mumps->id.redrhs)); 321 mumps->id.lredrhs = mumps->id.size_schur; 322 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 323 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 324 } 325 mumps->id.ICNTL(26) = 1; /* condensation phase */ 326 } else { /* prepare for the expansion step */ 327 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 328 PetscCall(MatMumpsSolveSchur_Private(F)); 329 mumps->id.ICNTL(26) = 2; /* expansion phase */ 330 PetscMUMPS_c(mumps); 331 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 332 /* restore defaults */ 333 mumps->id.ICNTL(26) = -1; 334 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 335 if (mumps->id.nrhs > 1) { 336 PetscCall(PetscFree(mumps->id.redrhs)); 337 mumps->id.lredrhs = 0; 338 mumps->sizeredrhs = 0; 339 } 340 } 341 PetscFunctionReturn(PETSC_SUCCESS); 342 } 343 344 /* 345 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 346 347 input: 348 A - matrix in aij,baij or sbaij format 349 shift - 0: C style output triple; 1: Fortran style output triple. 350 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 351 MAT_REUSE_MATRIX: only the values in v array are updated 352 output: 353 nnz - dim of r, c, and v (number of local nonzero entries of A) 354 r, c, v - row and col index, matrix values (matrix triples) 355 356 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 357 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 358 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 359 360 */ 361 362 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 363 { 364 const PetscScalar *av; 365 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 366 PetscInt64 nz, rnz, i, j, k; 367 PetscMUMPSInt *row, *col; 368 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 369 370 PetscFunctionBegin; 371 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 372 mumps->val = (PetscScalar *)av; 373 if (reuse == MAT_INITIAL_MATRIX) { 374 nz = aa->nz; 375 ai = aa->i; 376 aj = aa->j; 377 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 378 for (i = k = 0; i < M; i++) { 379 rnz = ai[i + 1] - ai[i]; 380 ajj = aj + ai[i]; 381 for (j = 0; j < rnz; j++) { 382 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 383 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 384 k++; 385 } 386 } 387 mumps->irn = row; 388 mumps->jcn = col; 389 mumps->nnz = nz; 390 } 391 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 392 PetscFunctionReturn(PETSC_SUCCESS); 393 } 394 395 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 396 { 397 PetscInt64 nz, i, j, k, r; 398 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 399 PetscMUMPSInt *row, *col; 400 401 PetscFunctionBegin; 402 mumps->val = a->val; 403 if (reuse == MAT_INITIAL_MATRIX) { 404 nz = a->sliidx[a->totalslices]; 405 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 406 for (i = k = 0; i < a->totalslices; i++) { 407 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 408 } 409 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 410 mumps->irn = row; 411 mumps->jcn = col; 412 mumps->nnz = nz; 413 } 414 PetscFunctionReturn(PETSC_SUCCESS); 415 } 416 417 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 418 { 419 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 420 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 421 PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 422 PetscInt bs; 423 PetscMUMPSInt *row, *col; 424 425 PetscFunctionBegin; 426 PetscCall(MatGetBlockSize(A, &bs)); 427 M = A->rmap->N / bs; 428 mumps->val = aa->a; 429 if (reuse == MAT_INITIAL_MATRIX) { 430 ai = aa->i; 431 aj = aa->j; 432 nz = bs2 * aa->nz; 433 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 434 for (i = 0; i < M; i++) { 435 ajj = aj + ai[i]; 436 rnz = ai[i + 1] - ai[i]; 437 for (k = 0; k < rnz; k++) { 438 for (j = 0; j < bs; j++) { 439 for (m = 0; m < bs; m++) { 440 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 441 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 442 idx++; 443 } 444 } 445 } 446 } 447 mumps->irn = row; 448 mumps->jcn = col; 449 mumps->nnz = nz; 450 } 451 PetscFunctionReturn(PETSC_SUCCESS); 452 } 453 454 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 455 { 456 const PetscInt *ai, *aj, *ajj; 457 PetscInt bs; 458 PetscInt64 nz, rnz, i, j, k, m; 459 PetscMUMPSInt *row, *col; 460 PetscScalar *val; 461 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 462 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 463 #if defined(PETSC_USE_COMPLEX) 464 PetscBool isset, hermitian; 465 #endif 466 467 PetscFunctionBegin; 468 #if defined(PETSC_USE_COMPLEX) 469 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 470 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 471 #endif 472 ai = aa->i; 473 aj = aa->j; 474 PetscCall(MatGetBlockSize(A, &bs)); 475 if (reuse == MAT_INITIAL_MATRIX) { 476 const PetscInt64 alloc_size = aa->nz * bs2; 477 478 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 479 if (bs > 1) { 480 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 481 mumps->val = mumps->val_alloc; 482 } else { 483 mumps->val = aa->a; 484 } 485 mumps->irn = row; 486 mumps->jcn = col; 487 } else { 488 if (bs == 1) mumps->val = aa->a; 489 row = mumps->irn; 490 col = mumps->jcn; 491 } 492 val = mumps->val; 493 494 nz = 0; 495 if (bs > 1) { 496 for (i = 0; i < mbs; i++) { 497 rnz = ai[i + 1] - ai[i]; 498 ajj = aj + ai[i]; 499 for (j = 0; j < rnz; j++) { 500 for (k = 0; k < bs; k++) { 501 for (m = 0; m < bs; m++) { 502 if (ajj[j] > i || k >= m) { 503 if (reuse == MAT_INITIAL_MATRIX) { 504 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 505 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 506 } 507 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 508 } 509 } 510 } 511 } 512 } 513 } else if (reuse == MAT_INITIAL_MATRIX) { 514 for (i = 0; i < mbs; i++) { 515 rnz = ai[i + 1] - ai[i]; 516 ajj = aj + ai[i]; 517 for (j = 0; j < rnz; j++) { 518 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 519 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 520 nz++; 521 } 522 } 523 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 524 } 525 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 530 { 531 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 532 PetscInt64 nz, rnz, i, j; 533 const PetscScalar *av, *v1; 534 PetscScalar *val; 535 PetscMUMPSInt *row, *col; 536 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 537 PetscBool missing; 538 #if defined(PETSC_USE_COMPLEX) 539 PetscBool hermitian, isset; 540 #endif 541 542 PetscFunctionBegin; 543 #if defined(PETSC_USE_COMPLEX) 544 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 545 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 546 #endif 547 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 548 ai = aa->i; 549 aj = aa->j; 550 adiag = aa->diag; 551 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 552 if (reuse == MAT_INITIAL_MATRIX) { 553 /* count nz in the upper triangular part of A */ 554 nz = 0; 555 if (missing) { 556 for (i = 0; i < M; i++) { 557 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 558 for (j = ai[i]; j < ai[i + 1]; j++) { 559 if (aj[j] < i) continue; 560 nz++; 561 } 562 } else { 563 nz += ai[i + 1] - adiag[i]; 564 } 565 } 566 } else { 567 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 568 } 569 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 570 PetscCall(PetscMalloc1(nz, &val)); 571 mumps->nnz = nz; 572 mumps->irn = row; 573 mumps->jcn = col; 574 mumps->val = mumps->val_alloc = val; 575 576 nz = 0; 577 if (missing) { 578 for (i = 0; i < M; i++) { 579 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 580 for (j = ai[i]; j < ai[i + 1]; j++) { 581 if (aj[j] < i) continue; 582 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 583 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 584 val[nz] = av[j]; 585 nz++; 586 } 587 } else { 588 rnz = ai[i + 1] - adiag[i]; 589 ajj = aj + adiag[i]; 590 v1 = av + adiag[i]; 591 for (j = 0; j < rnz; j++) { 592 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 593 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 594 val[nz++] = v1[j]; 595 } 596 } 597 } 598 } else { 599 for (i = 0; i < M; i++) { 600 rnz = ai[i + 1] - adiag[i]; 601 ajj = aj + adiag[i]; 602 v1 = av + adiag[i]; 603 for (j = 0; j < rnz; j++) { 604 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 605 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 606 val[nz++] = v1[j]; 607 } 608 } 609 } 610 } else { 611 nz = 0; 612 val = mumps->val; 613 if (missing) { 614 for (i = 0; i < M; i++) { 615 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 616 for (j = ai[i]; j < ai[i + 1]; j++) { 617 if (aj[j] < i) continue; 618 val[nz++] = av[j]; 619 } 620 } else { 621 rnz = ai[i + 1] - adiag[i]; 622 v1 = av + adiag[i]; 623 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 624 } 625 } 626 } else { 627 for (i = 0; i < M; i++) { 628 rnz = ai[i + 1] - adiag[i]; 629 v1 = av + adiag[i]; 630 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 631 } 632 } 633 } 634 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 635 PetscFunctionReturn(PETSC_SUCCESS); 636 } 637 638 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 639 { 640 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 641 PetscInt bs; 642 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 643 PetscMUMPSInt *row, *col; 644 const PetscScalar *av, *bv, *v1, *v2; 645 PetscScalar *val; 646 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 647 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 648 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 649 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 650 #if defined(PETSC_USE_COMPLEX) 651 PetscBool hermitian, isset; 652 #endif 653 654 PetscFunctionBegin; 655 #if defined(PETSC_USE_COMPLEX) 656 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 657 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 658 #endif 659 PetscCall(MatGetBlockSize(A, &bs)); 660 rstart = A->rmap->rstart; 661 ai = aa->i; 662 aj = aa->j; 663 bi = bb->i; 664 bj = bb->j; 665 av = aa->a; 666 bv = bb->a; 667 668 garray = mat->garray; 669 670 if (reuse == MAT_INITIAL_MATRIX) { 671 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 672 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 673 PetscCall(PetscMalloc1(nz, &val)); 674 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 675 mumps->irn = row; 676 mumps->jcn = col; 677 mumps->val = mumps->val_alloc = val; 678 } else { 679 val = mumps->val; 680 } 681 682 jj = 0; 683 irow = rstart; 684 for (i = 0; i < mbs; i++) { 685 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 686 countA = ai[i + 1] - ai[i]; 687 countB = bi[i + 1] - bi[i]; 688 bjj = bj + bi[i]; 689 v1 = av + ai[i] * bs2; 690 v2 = bv + bi[i] * bs2; 691 692 if (bs > 1) { 693 /* A-part */ 694 for (j = 0; j < countA; j++) { 695 for (k = 0; k < bs; k++) { 696 for (m = 0; m < bs; m++) { 697 if (rstart + ajj[j] * bs > irow || k >= m) { 698 if (reuse == MAT_INITIAL_MATRIX) { 699 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 700 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 701 } 702 val[jj++] = v1[j * bs2 + m + k * bs]; 703 } 704 } 705 } 706 } 707 708 /* B-part */ 709 for (j = 0; j < countB; j++) { 710 for (k = 0; k < bs; k++) { 711 for (m = 0; m < bs; m++) { 712 if (reuse == MAT_INITIAL_MATRIX) { 713 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 714 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 715 } 716 val[jj++] = v2[j * bs2 + m + k * bs]; 717 } 718 } 719 } 720 } else { 721 /* A-part */ 722 for (j = 0; j < countA; j++) { 723 if (reuse == MAT_INITIAL_MATRIX) { 724 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 725 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 726 } 727 val[jj++] = v1[j]; 728 } 729 730 /* B-part */ 731 for (j = 0; j < countB; j++) { 732 if (reuse == MAT_INITIAL_MATRIX) { 733 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 734 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 735 } 736 val[jj++] = v2[j]; 737 } 738 } 739 irow += bs; 740 } 741 mumps->nnz = jj; 742 PetscFunctionReturn(PETSC_SUCCESS); 743 } 744 745 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 746 { 747 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 748 PetscInt64 rstart, nz, i, j, jj, irow, countA, countB; 749 PetscMUMPSInt *row, *col; 750 const PetscScalar *av, *bv, *v1, *v2; 751 PetscScalar *val; 752 Mat Ad, Ao; 753 Mat_SeqAIJ *aa; 754 Mat_SeqAIJ *bb; 755 756 PetscFunctionBegin; 757 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 758 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 759 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 760 761 aa = (Mat_SeqAIJ *)(Ad)->data; 762 bb = (Mat_SeqAIJ *)(Ao)->data; 763 ai = aa->i; 764 aj = aa->j; 765 bi = bb->i; 766 bj = bb->j; 767 768 rstart = A->rmap->rstart; 769 770 if (reuse == MAT_INITIAL_MATRIX) { 771 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 772 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 773 PetscCall(PetscMalloc1(nz, &val)); 774 mumps->nnz = nz; 775 mumps->irn = row; 776 mumps->jcn = col; 777 mumps->val = mumps->val_alloc = val; 778 } else { 779 val = mumps->val; 780 } 781 782 jj = 0; 783 irow = rstart; 784 for (i = 0; i < m; i++) { 785 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 786 countA = ai[i + 1] - ai[i]; 787 countB = bi[i + 1] - bi[i]; 788 bjj = bj + bi[i]; 789 v1 = av + ai[i]; 790 v2 = bv + bi[i]; 791 792 /* A-part */ 793 for (j = 0; j < countA; j++) { 794 if (reuse == MAT_INITIAL_MATRIX) { 795 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 796 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 797 } 798 val[jj++] = v1[j]; 799 } 800 801 /* B-part */ 802 for (j = 0; j < countB; j++) { 803 if (reuse == MAT_INITIAL_MATRIX) { 804 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 805 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 806 } 807 val[jj++] = v2[j]; 808 } 809 irow++; 810 } 811 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 812 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 813 PetscFunctionReturn(PETSC_SUCCESS); 814 } 815 816 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 817 { 818 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 819 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 820 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 821 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 822 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart; 823 const PetscInt bs2 = mat->bs2; 824 PetscInt bs; 825 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 826 PetscMUMPSInt *row, *col; 827 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 828 PetscScalar *val; 829 830 PetscFunctionBegin; 831 PetscCall(MatGetBlockSize(A, &bs)); 832 if (reuse == MAT_INITIAL_MATRIX) { 833 nz = bs2 * (aa->nz + bb->nz); 834 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 835 PetscCall(PetscMalloc1(nz, &val)); 836 mumps->nnz = nz; 837 mumps->irn = row; 838 mumps->jcn = col; 839 mumps->val = mumps->val_alloc = val; 840 } else { 841 val = mumps->val; 842 } 843 844 jj = 0; 845 irow = rstart; 846 for (i = 0; i < mbs; i++) { 847 countA = ai[i + 1] - ai[i]; 848 countB = bi[i + 1] - bi[i]; 849 ajj = aj + ai[i]; 850 bjj = bj + bi[i]; 851 v1 = av + bs2 * ai[i]; 852 v2 = bv + bs2 * bi[i]; 853 854 idx = 0; 855 /* A-part */ 856 for (k = 0; k < countA; k++) { 857 for (j = 0; j < bs; j++) { 858 for (n = 0; n < bs; n++) { 859 if (reuse == MAT_INITIAL_MATRIX) { 860 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 861 PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj])); 862 } 863 val[jj++] = v1[idx++]; 864 } 865 } 866 } 867 868 idx = 0; 869 /* B-part */ 870 for (k = 0; k < countB; k++) { 871 for (j = 0; j < bs; j++) { 872 for (n = 0; n < bs; n++) { 873 if (reuse == MAT_INITIAL_MATRIX) { 874 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 875 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 876 } 877 val[jj++] = v2[idx++]; 878 } 879 } 880 } 881 irow += bs; 882 } 883 PetscFunctionReturn(PETSC_SUCCESS); 884 } 885 886 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 887 { 888 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 889 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 890 PetscMUMPSInt *row, *col; 891 const PetscScalar *av, *bv, *v1, *v2; 892 PetscScalar *val; 893 Mat Ad, Ao; 894 Mat_SeqAIJ *aa; 895 Mat_SeqAIJ *bb; 896 #if defined(PETSC_USE_COMPLEX) 897 PetscBool hermitian, isset; 898 #endif 899 900 PetscFunctionBegin; 901 #if defined(PETSC_USE_COMPLEX) 902 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 903 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 904 #endif 905 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 906 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 907 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 908 909 aa = (Mat_SeqAIJ *)(Ad)->data; 910 bb = (Mat_SeqAIJ *)(Ao)->data; 911 ai = aa->i; 912 aj = aa->j; 913 adiag = aa->diag; 914 bi = bb->i; 915 bj = bb->j; 916 917 rstart = A->rmap->rstart; 918 919 if (reuse == MAT_INITIAL_MATRIX) { 920 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 921 nzb = 0; /* num of upper triangular entries in mat->B */ 922 for (i = 0; i < m; i++) { 923 nza += (ai[i + 1] - adiag[i]); 924 countB = bi[i + 1] - bi[i]; 925 bjj = bj + bi[i]; 926 for (j = 0; j < countB; j++) { 927 if (garray[bjj[j]] > rstart) nzb++; 928 } 929 } 930 931 nz = nza + nzb; /* total nz of upper triangular part of mat */ 932 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 933 PetscCall(PetscMalloc1(nz, &val)); 934 mumps->nnz = nz; 935 mumps->irn = row; 936 mumps->jcn = col; 937 mumps->val = mumps->val_alloc = val; 938 } else { 939 val = mumps->val; 940 } 941 942 jj = 0; 943 irow = rstart; 944 for (i = 0; i < m; i++) { 945 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 946 v1 = av + adiag[i]; 947 countA = ai[i + 1] - adiag[i]; 948 countB = bi[i + 1] - bi[i]; 949 bjj = bj + bi[i]; 950 v2 = bv + bi[i]; 951 952 /* A-part */ 953 for (j = 0; j < countA; j++) { 954 if (reuse == MAT_INITIAL_MATRIX) { 955 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 956 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 957 } 958 val[jj++] = v1[j]; 959 } 960 961 /* B-part */ 962 for (j = 0; j < countB; j++) { 963 if (garray[bjj[j]] > rstart) { 964 if (reuse == MAT_INITIAL_MATRIX) { 965 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 966 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 967 } 968 val[jj++] = v2[j]; 969 } 970 } 971 irow++; 972 } 973 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 974 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 975 PetscFunctionReturn(PETSC_SUCCESS); 976 } 977 978 PetscErrorCode MatDestroy_MUMPS(Mat A) 979 { 980 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 981 982 PetscFunctionBegin; 983 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 984 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 985 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 986 PetscCall(VecDestroy(&mumps->b_seq)); 987 PetscCall(VecDestroy(&mumps->x_seq)); 988 PetscCall(PetscFree(mumps->id.perm_in)); 989 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 990 PetscCall(PetscFree(mumps->val_alloc)); 991 PetscCall(PetscFree(mumps->info)); 992 PetscCall(PetscFree(mumps->ICNTL_pre)); 993 PetscCall(PetscFree(mumps->CNTL_pre)); 994 PetscCall(MatMumpsResetSchur_Private(mumps)); 995 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 996 mumps->id.job = JOB_END; 997 PetscMUMPS_c(mumps); 998 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 999 if (mumps->mumps_comm != MPI_COMM_NULL) { 1000 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1001 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1002 } 1003 } 1004 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1005 if (mumps->use_petsc_omp_support) { 1006 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1007 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1008 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1009 } 1010 #endif 1011 PetscCall(PetscFree(mumps->ia_alloc)); 1012 PetscCall(PetscFree(mumps->ja_alloc)); 1013 PetscCall(PetscFree(mumps->recvcount)); 1014 PetscCall(PetscFree(mumps->reqs)); 1015 PetscCall(PetscFree(mumps->irhs_loc)); 1016 PetscCall(PetscFree(A->data)); 1017 1018 /* clear composed functions */ 1019 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1020 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1021 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1022 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1023 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1024 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1025 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1026 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1027 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1028 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1029 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1030 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1031 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1032 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1033 PetscFunctionReturn(PETSC_SUCCESS); 1034 } 1035 1036 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1037 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1038 { 1039 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1040 const PetscMPIInt ompsize = mumps->omp_comm_size; 1041 PetscInt i, m, M, rstart; 1042 1043 PetscFunctionBegin; 1044 PetscCall(MatGetSize(A, &M, NULL)); 1045 PetscCall(MatGetLocalSize(A, &m, NULL)); 1046 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1047 if (ompsize == 1) { 1048 if (!mumps->irhs_loc) { 1049 mumps->nloc_rhs = m; 1050 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1051 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1052 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1053 } 1054 mumps->id.rhs_loc = (MumpsScalar *)array; 1055 } else { 1056 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1057 const PetscInt *ranges; 1058 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1059 MPI_Group petsc_group, omp_group; 1060 PetscScalar *recvbuf = NULL; 1061 1062 if (mumps->is_omp_master) { 1063 /* Lazily initialize the omp stuff for distributed rhs */ 1064 if (!mumps->irhs_loc) { 1065 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1066 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1067 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1068 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1069 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1070 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1071 1072 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1073 mumps->nloc_rhs = 0; 1074 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1075 for (j = 0; j < ompsize; j++) { 1076 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1077 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1078 } 1079 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1080 for (j = k = 0; j < ompsize; j++) { 1081 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1082 } 1083 1084 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1085 PetscCallMPI(MPI_Group_free(&petsc_group)); 1086 PetscCallMPI(MPI_Group_free(&omp_group)); 1087 } 1088 1089 /* Realloc buffers when current nrhs is bigger than what we have met */ 1090 if (nrhs > mumps->max_nrhs) { 1091 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1092 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1093 mumps->max_nrhs = nrhs; 1094 } 1095 1096 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1097 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1098 mumps->rhs_disps[0] = 0; 1099 for (j = 1; j < ompsize; j++) { 1100 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1101 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1102 } 1103 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1104 } 1105 1106 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1107 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1108 1109 if (mumps->is_omp_master) { 1110 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1111 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1112 for (j = 0; j < ompsize; j++) { 1113 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1114 dst = dstbase; 1115 for (i = 0; i < nrhs; i++) { 1116 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1117 src += mumps->rhs_nrow[j]; 1118 dst += mumps->nloc_rhs; 1119 } 1120 dstbase += mumps->rhs_nrow[j]; 1121 } 1122 } 1123 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1124 } 1125 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1126 } 1127 mumps->id.nrhs = nrhs; 1128 mumps->id.nloc_rhs = mumps->nloc_rhs; 1129 mumps->id.lrhs_loc = mumps->nloc_rhs; 1130 mumps->id.irhs_loc = mumps->irhs_loc; 1131 PetscFunctionReturn(PETSC_SUCCESS); 1132 } 1133 1134 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1135 { 1136 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1137 const PetscScalar *rarray = NULL; 1138 PetscScalar *array; 1139 IS is_iden, is_petsc; 1140 PetscInt i; 1141 PetscBool second_solve = PETSC_FALSE; 1142 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1143 1144 PetscFunctionBegin; 1145 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1146 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1147 &cite1)); 1148 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1149 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1150 &cite2)); 1151 1152 if (A->factorerrortype) { 1153 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1154 PetscCall(VecSetInf(x)); 1155 PetscFunctionReturn(PETSC_SUCCESS); 1156 } 1157 1158 mumps->id.nrhs = 1; 1159 if (mumps->petsc_size > 1) { 1160 if (mumps->ICNTL20 == 10) { 1161 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1162 PetscCall(VecGetArrayRead(b, &rarray)); 1163 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1164 } else { 1165 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1166 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1167 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1168 if (!mumps->myid) { 1169 PetscCall(VecGetArray(mumps->b_seq, &array)); 1170 mumps->id.rhs = (MumpsScalar *)array; 1171 } 1172 } 1173 } else { /* petsc_size == 1 */ 1174 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1175 PetscCall(VecCopy(b, x)); 1176 PetscCall(VecGetArray(x, &array)); 1177 mumps->id.rhs = (MumpsScalar *)array; 1178 } 1179 1180 /* 1181 handle condensation step of Schur complement (if any) 1182 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1183 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1184 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1185 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1186 */ 1187 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1188 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1189 second_solve = PETSC_TRUE; 1190 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1191 } 1192 /* solve phase */ 1193 mumps->id.job = JOB_SOLVE; 1194 PetscMUMPS_c(mumps); 1195 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1196 1197 /* handle expansion step of Schur complement (if any) */ 1198 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1199 1200 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1201 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1202 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1203 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1204 } 1205 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1206 PetscInt *isol2_loc = NULL; 1207 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1208 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1209 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1210 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1211 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1212 PetscCall(ISDestroy(&is_iden)); 1213 PetscCall(ISDestroy(&is_petsc)); 1214 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1215 } 1216 1217 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1218 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1219 } 1220 1221 if (mumps->petsc_size > 1) { 1222 if (mumps->ICNTL20 == 10) { 1223 PetscCall(VecRestoreArrayRead(b, &rarray)); 1224 } else if (!mumps->myid) { 1225 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1226 } 1227 } else PetscCall(VecRestoreArray(x, &array)); 1228 1229 PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3))); 1230 PetscFunctionReturn(PETSC_SUCCESS); 1231 } 1232 1233 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1234 { 1235 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1236 1237 PetscFunctionBegin; 1238 mumps->id.ICNTL(9) = 0; 1239 PetscCall(MatSolve_MUMPS(A, b, x)); 1240 mumps->id.ICNTL(9) = 1; 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1245 { 1246 Mat Bt = NULL; 1247 PetscBool denseX, denseB, flg, flgT; 1248 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1249 PetscInt i, nrhs, M; 1250 PetscScalar *array; 1251 const PetscScalar *rbray; 1252 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1253 PetscMUMPSInt *isol_loc, *isol_loc_save; 1254 PetscScalar *bray, *sol_loc, *sol_loc_save; 1255 IS is_to, is_from; 1256 PetscInt k, proc, j, m, myrstart; 1257 const PetscInt *rstart; 1258 Vec v_mpi, msol_loc; 1259 VecScatter scat_sol; 1260 Vec b_seq; 1261 VecScatter scat_rhs; 1262 PetscScalar *aa; 1263 PetscInt spnr, *ia, *ja; 1264 Mat_MPIAIJ *b = NULL; 1265 1266 PetscFunctionBegin; 1267 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1268 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1269 1270 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1271 if (denseB) { 1272 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1273 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1274 } else { /* sparse B */ 1275 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1276 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1277 if (flgT) { /* input B is transpose of actual RHS matrix, 1278 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1279 PetscCall(MatTransposeGetMat(B, &Bt)); 1280 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1281 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1282 } 1283 1284 PetscCall(MatGetSize(B, &M, &nrhs)); 1285 mumps->id.nrhs = nrhs; 1286 mumps->id.lrhs = M; 1287 mumps->id.rhs = NULL; 1288 1289 if (mumps->petsc_size == 1) { 1290 PetscScalar *aa; 1291 PetscInt spnr, *ia, *ja; 1292 PetscBool second_solve = PETSC_FALSE; 1293 1294 PetscCall(MatDenseGetArray(X, &array)); 1295 mumps->id.rhs = (MumpsScalar *)array; 1296 1297 if (denseB) { 1298 /* copy B to X */ 1299 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1300 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1301 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1302 } else { /* sparse B */ 1303 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1304 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1305 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1306 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1307 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1308 } 1309 /* handle condensation step of Schur complement (if any) */ 1310 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1311 second_solve = PETSC_TRUE; 1312 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1313 } 1314 /* solve phase */ 1315 mumps->id.job = JOB_SOLVE; 1316 PetscMUMPS_c(mumps); 1317 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1318 1319 /* handle expansion step of Schur complement (if any) */ 1320 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1321 if (!denseB) { /* sparse B */ 1322 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1323 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1324 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1325 } 1326 PetscCall(MatDenseRestoreArray(X, &array)); 1327 PetscFunctionReturn(PETSC_SUCCESS); 1328 } 1329 1330 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1331 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1332 1333 /* create msol_loc to hold mumps local solution */ 1334 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1335 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1336 1337 lsol_loc = mumps->id.lsol_loc; 1338 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1339 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1340 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1341 mumps->id.isol_loc = isol_loc; 1342 1343 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1344 1345 if (denseB) { 1346 if (mumps->ICNTL20 == 10) { 1347 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1348 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1349 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1350 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1351 PetscCall(MatGetLocalSize(B, &m, NULL)); 1352 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1353 } else { 1354 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1355 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1356 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1357 0, re-arrange B into desired order, which is a local operation. 1358 */ 1359 1360 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1361 /* wrap dense rhs matrix B into a vector v_mpi */ 1362 PetscCall(MatGetLocalSize(B, &m, NULL)); 1363 PetscCall(MatDenseGetArray(B, &bray)); 1364 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1365 PetscCall(MatDenseRestoreArray(B, &bray)); 1366 1367 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1368 if (!mumps->myid) { 1369 PetscInt *idx; 1370 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1371 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1372 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1373 k = 0; 1374 for (proc = 0; proc < mumps->petsc_size; proc++) { 1375 for (j = 0; j < nrhs; j++) { 1376 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1377 } 1378 } 1379 1380 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1381 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1382 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1383 } else { 1384 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1385 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1386 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1387 } 1388 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1389 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1390 PetscCall(ISDestroy(&is_to)); 1391 PetscCall(ISDestroy(&is_from)); 1392 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1393 1394 if (!mumps->myid) { /* define rhs on the host */ 1395 PetscCall(VecGetArray(b_seq, &bray)); 1396 mumps->id.rhs = (MumpsScalar *)bray; 1397 PetscCall(VecRestoreArray(b_seq, &bray)); 1398 } 1399 } 1400 } else { /* sparse B */ 1401 b = (Mat_MPIAIJ *)Bt->data; 1402 1403 /* wrap dense X into a vector v_mpi */ 1404 PetscCall(MatGetLocalSize(X, &m, NULL)); 1405 PetscCall(MatDenseGetArray(X, &bray)); 1406 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1407 PetscCall(MatDenseRestoreArray(X, &bray)); 1408 1409 if (!mumps->myid) { 1410 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1411 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1412 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1413 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1414 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1415 } else { 1416 mumps->id.irhs_ptr = NULL; 1417 mumps->id.irhs_sparse = NULL; 1418 mumps->id.nz_rhs = 0; 1419 mumps->id.rhs_sparse = NULL; 1420 } 1421 } 1422 1423 /* solve phase */ 1424 mumps->id.job = JOB_SOLVE; 1425 PetscMUMPS_c(mumps); 1426 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1427 1428 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1429 PetscCall(MatDenseGetArray(X, &array)); 1430 PetscCall(VecPlaceArray(v_mpi, array)); 1431 1432 /* create scatter scat_sol */ 1433 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1434 /* iidx: index for scatter mumps solution to petsc X */ 1435 1436 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1437 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1438 for (i = 0; i < lsol_loc; i++) { 1439 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1440 1441 for (proc = 0; proc < mumps->petsc_size; proc++) { 1442 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1443 myrstart = rstart[proc]; 1444 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1445 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1446 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1447 break; 1448 } 1449 } 1450 1451 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1452 } 1453 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1454 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1455 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1456 PetscCall(ISDestroy(&is_from)); 1457 PetscCall(ISDestroy(&is_to)); 1458 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1459 PetscCall(MatDenseRestoreArray(X, &array)); 1460 1461 /* free spaces */ 1462 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1463 mumps->id.isol_loc = isol_loc_save; 1464 1465 PetscCall(PetscFree2(sol_loc, isol_loc)); 1466 PetscCall(PetscFree(idxx)); 1467 PetscCall(VecDestroy(&msol_loc)); 1468 PetscCall(VecDestroy(&v_mpi)); 1469 if (!denseB) { 1470 if (!mumps->myid) { 1471 b = (Mat_MPIAIJ *)Bt->data; 1472 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1473 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1474 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1475 } 1476 } else { 1477 if (mumps->ICNTL20 == 0) { 1478 PetscCall(VecDestroy(&b_seq)); 1479 PetscCall(VecScatterDestroy(&scat_rhs)); 1480 } 1481 } 1482 PetscCall(VecScatterDestroy(&scat_sol)); 1483 PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3))); 1484 PetscFunctionReturn(PETSC_SUCCESS); 1485 } 1486 1487 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1488 { 1489 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1490 PetscMUMPSInt oldvalue = mumps->id.ICNTL(9); 1491 1492 PetscFunctionBegin; 1493 mumps->id.ICNTL(9) = 0; 1494 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1495 mumps->id.ICNTL(9) = oldvalue; 1496 PetscFunctionReturn(PETSC_SUCCESS); 1497 } 1498 1499 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1500 { 1501 PetscBool flg; 1502 Mat B; 1503 1504 PetscFunctionBegin; 1505 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1506 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1507 1508 /* Create B=Bt^T that uses Bt's data structure */ 1509 PetscCall(MatCreateTranspose(Bt, &B)); 1510 1511 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1512 PetscCall(MatDestroy(&B)); 1513 PetscFunctionReturn(PETSC_SUCCESS); 1514 } 1515 1516 #if !defined(PETSC_USE_COMPLEX) 1517 /* 1518 input: 1519 F: numeric factor 1520 output: 1521 nneg: total number of negative pivots 1522 nzero: total number of zero pivots 1523 npos: (global dimension of F) - nneg - nzero 1524 */ 1525 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1526 { 1527 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1528 PetscMPIInt size; 1529 1530 PetscFunctionBegin; 1531 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1532 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1533 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1534 1535 if (nneg) *nneg = mumps->id.INFOG(12); 1536 if (nzero || npos) { 1537 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1538 if (nzero) *nzero = mumps->id.INFOG(28); 1539 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1540 } 1541 PetscFunctionReturn(PETSC_SUCCESS); 1542 } 1543 #endif 1544 1545 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1546 { 1547 PetscInt i, nreqs; 1548 PetscMUMPSInt *irn, *jcn; 1549 PetscMPIInt count; 1550 PetscInt64 totnnz, remain; 1551 const PetscInt osize = mumps->omp_comm_size; 1552 PetscScalar *val; 1553 1554 PetscFunctionBegin; 1555 if (osize > 1) { 1556 if (reuse == MAT_INITIAL_MATRIX) { 1557 /* master first gathers counts of nonzeros to receive */ 1558 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1559 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1560 1561 /* Then each computes number of send/recvs */ 1562 if (mumps->is_omp_master) { 1563 /* Start from 1 since self communication is not done in MPI */ 1564 nreqs = 0; 1565 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1566 } else { 1567 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1568 } 1569 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1570 1571 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1572 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1573 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1574 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1575 */ 1576 nreqs = 0; /* counter for actual send/recvs */ 1577 if (mumps->is_omp_master) { 1578 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1579 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1580 PetscCall(PetscMalloc1(totnnz, &val)); 1581 1582 /* Self communication */ 1583 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1584 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1585 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1586 1587 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1588 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1589 PetscCall(PetscFree(mumps->val_alloc)); 1590 mumps->nnz = totnnz; 1591 mumps->irn = irn; 1592 mumps->jcn = jcn; 1593 mumps->val = mumps->val_alloc = val; 1594 1595 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1596 jcn += mumps->recvcount[0]; 1597 val += mumps->recvcount[0]; 1598 1599 /* Remote communication */ 1600 for (i = 1; i < osize; i++) { 1601 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1602 remain = mumps->recvcount[i] - count; 1603 while (count > 0) { 1604 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1605 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1606 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1607 irn += count; 1608 jcn += count; 1609 val += count; 1610 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1611 remain -= count; 1612 } 1613 } 1614 } else { 1615 irn = mumps->irn; 1616 jcn = mumps->jcn; 1617 val = mumps->val; 1618 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1619 remain = mumps->nnz - count; 1620 while (count > 0) { 1621 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1622 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1623 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1624 irn += count; 1625 jcn += count; 1626 val += count; 1627 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1628 remain -= count; 1629 } 1630 } 1631 } else { 1632 nreqs = 0; 1633 if (mumps->is_omp_master) { 1634 val = mumps->val + mumps->recvcount[0]; 1635 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1636 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1637 remain = mumps->recvcount[i] - count; 1638 while (count > 0) { 1639 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1640 val += count; 1641 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1642 remain -= count; 1643 } 1644 } 1645 } else { 1646 val = mumps->val; 1647 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1648 remain = mumps->nnz - count; 1649 while (count > 0) { 1650 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1651 val += count; 1652 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1653 remain -= count; 1654 } 1655 } 1656 } 1657 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1658 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1659 } 1660 PetscFunctionReturn(PETSC_SUCCESS); 1661 } 1662 1663 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1664 { 1665 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1666 PetscBool isMPIAIJ; 1667 1668 PetscFunctionBegin; 1669 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1670 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1671 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1672 PetscFunctionReturn(PETSC_SUCCESS); 1673 } 1674 1675 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1676 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1677 1678 /* numerical factorization phase */ 1679 mumps->id.job = JOB_FACTNUMERIC; 1680 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1681 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1682 } else { 1683 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1684 } 1685 PetscMUMPS_c(mumps); 1686 if (mumps->id.INFOG(1) < 0) { 1687 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 1688 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1689 PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1690 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1691 } else if (mumps->id.INFOG(1) == -13) { 1692 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1693 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1694 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1695 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1696 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1697 } else { 1698 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1699 F->factorerrortype = MAT_FACTOR_OTHER; 1700 } 1701 } 1702 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, " mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16)); 1703 1704 F->assembled = PETSC_TRUE; 1705 1706 if (F->schur) { /* reset Schur status to unfactored */ 1707 #if defined(PETSC_HAVE_CUDA) 1708 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1709 #endif 1710 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1711 mumps->id.ICNTL(19) = 2; 1712 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1713 } 1714 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1715 } 1716 1717 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1718 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1719 1720 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1721 if (mumps->petsc_size > 1) { 1722 PetscInt lsol_loc; 1723 PetscScalar *sol_loc; 1724 1725 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1726 1727 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1728 if (mumps->x_seq) { 1729 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1730 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1731 PetscCall(VecDestroy(&mumps->x_seq)); 1732 } 1733 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1734 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1735 mumps->id.lsol_loc = lsol_loc; 1736 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1737 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 1738 } 1739 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1740 PetscFunctionReturn(PETSC_SUCCESS); 1741 } 1742 1743 /* Sets MUMPS options from the options database */ 1744 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1745 { 1746 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1747 PetscMUMPSInt icntl = 0, size, *listvar_schur; 1748 PetscInt info[80], i, ninfo = 80, rbs, cbs; 1749 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1750 MumpsScalar *arr; 1751 1752 PetscFunctionBegin; 1753 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 1754 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1755 PetscInt nthreads = 0; 1756 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1757 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1758 1759 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1760 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 1761 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 1762 1763 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 1764 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1765 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1766 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 1767 if (mumps->use_petsc_omp_support) { 1768 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 1769 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1770 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1771 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1772 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 1773 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 1774 #endif 1775 } else { 1776 mumps->omp_comm = PETSC_COMM_SELF; 1777 mumps->mumps_comm = mumps->petsc_comm; 1778 mumps->is_omp_master = PETSC_TRUE; 1779 } 1780 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 1781 mumps->reqs = NULL; 1782 mumps->tag = 0; 1783 1784 if (mumps->mumps_comm != MPI_COMM_NULL) { 1785 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 1786 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1787 MPI_Comm comm; 1788 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 1789 mumps->mumps_comm = comm; 1790 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 1791 } 1792 1793 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1794 mumps->id.job = JOB_INIT; 1795 mumps->id.par = 1; /* host participates factorizaton and solve */ 1796 mumps->id.sym = mumps->sym; 1797 1798 size = mumps->id.size_schur; 1799 arr = mumps->id.schur; 1800 listvar_schur = mumps->id.listvar_schur; 1801 PetscMUMPS_c(mumps); 1802 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1803 /* restore cached ICNTL and CNTL values */ 1804 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 1805 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 1806 PetscCall(PetscFree(mumps->ICNTL_pre)); 1807 PetscCall(PetscFree(mumps->CNTL_pre)); 1808 1809 if (schur) { 1810 mumps->id.size_schur = size; 1811 mumps->id.schur_lld = size; 1812 mumps->id.schur = arr; 1813 mumps->id.listvar_schur = listvar_schur; 1814 if (mumps->petsc_size > 1) { 1815 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 1816 1817 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1818 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1819 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 1820 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 1821 } else { 1822 if (F->factortype == MAT_FACTOR_LU) { 1823 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1824 } else { 1825 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1826 } 1827 } 1828 mumps->id.ICNTL(26) = -1; 1829 } 1830 1831 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1832 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1833 */ 1834 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 1835 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 1836 1837 mumps->scat_rhs = NULL; 1838 mumps->scat_sol = NULL; 1839 1840 /* set PETSc-MUMPS default options - override MUMPS default */ 1841 mumps->id.ICNTL(3) = 0; 1842 mumps->id.ICNTL(4) = 0; 1843 if (mumps->petsc_size == 1) { 1844 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1845 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1846 } else { 1847 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1848 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1849 } 1850 } 1851 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 1852 if (flg) mumps->id.ICNTL(1) = icntl; 1853 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 1854 if (flg) mumps->id.ICNTL(2) = icntl; 1855 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 1856 if (flg) mumps->id.ICNTL(3) = icntl; 1857 1858 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 1859 if (flg) mumps->id.ICNTL(4) = icntl; 1860 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1861 1862 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 1863 if (flg) mumps->id.ICNTL(6) = icntl; 1864 1865 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 1866 if (flg) { 1867 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 1868 mumps->id.ICNTL(7) = icntl; 1869 } 1870 1871 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 1872 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 1873 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 1874 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 1875 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 1876 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 1877 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 1878 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 1879 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 1880 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 1881 if (flg) { 1882 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 1883 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 1884 } 1885 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 1886 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1887 PetscCall(MatDestroy(&F->schur)); 1888 PetscCall(MatMumpsResetSchur_Private(mumps)); 1889 } 1890 1891 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 1892 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1893 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1894 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 1895 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 1896 In short, we could not use distributed RHS with MPICH until v4.0b1. 1897 */ 1898 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 1899 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1900 #else 1901 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1902 #endif 1903 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 1904 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 1905 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 1906 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 1907 #endif 1908 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 1909 1910 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 1911 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 1912 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 1913 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 1914 1915 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 1916 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 1917 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 1918 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 1919 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 1920 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 1921 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 1922 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 1923 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 1924 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 1925 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 1926 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 1927 1928 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 1929 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 1930 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 1931 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 1932 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 1933 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 1934 1935 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 1936 1937 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 1938 if (ninfo) { 1939 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 1940 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 1941 mumps->ninfo = ninfo; 1942 for (i = 0; i < ninfo; i++) { 1943 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 1944 mumps->info[i] = info[i]; 1945 } 1946 } 1947 PetscOptionsEnd(); 1948 PetscFunctionReturn(PETSC_SUCCESS); 1949 } 1950 1951 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 1952 { 1953 PetscFunctionBegin; 1954 if (mumps->id.INFOG(1) < 0) { 1955 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1956 if (mumps->id.INFOG(1) == -6) { 1957 PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1958 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1959 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1960 PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1961 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1962 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1963 PetscCall(PetscInfo(F, "Empty matrix\n")); 1964 } else { 1965 PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1966 F->factorerrortype = MAT_FACTOR_OTHER; 1967 } 1968 } 1969 PetscFunctionReturn(PETSC_SUCCESS); 1970 } 1971 1972 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 1973 { 1974 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1975 Vec b; 1976 const PetscInt M = A->rmap->N; 1977 1978 PetscFunctionBegin; 1979 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 1980 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 1981 PetscFunctionReturn(PETSC_SUCCESS); 1982 } 1983 1984 /* Set MUMPS options from the options database */ 1985 PetscCall(MatSetFromOptions_MUMPS(F, A)); 1986 1987 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 1988 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 1989 1990 /* analysis phase */ 1991 mumps->id.job = JOB_FACTSYMBOLIC; 1992 mumps->id.n = M; 1993 switch (mumps->id.ICNTL(18)) { 1994 case 0: /* centralized assembled matrix input */ 1995 if (!mumps->myid) { 1996 mumps->id.nnz = mumps->nnz; 1997 mumps->id.irn = mumps->irn; 1998 mumps->id.jcn = mumps->jcn; 1999 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2000 if (r) { 2001 mumps->id.ICNTL(7) = 1; 2002 if (!mumps->myid) { 2003 const PetscInt *idx; 2004 PetscInt i; 2005 2006 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2007 PetscCall(ISGetIndices(r, &idx)); 2008 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2009 PetscCall(ISRestoreIndices(r, &idx)); 2010 } 2011 } 2012 } 2013 break; 2014 case 3: /* distributed assembled matrix input (size>1) */ 2015 mumps->id.nnz_loc = mumps->nnz; 2016 mumps->id.irn_loc = mumps->irn; 2017 mumps->id.jcn_loc = mumps->jcn; 2018 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2019 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2020 PetscCall(MatCreateVecs(A, NULL, &b)); 2021 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2022 PetscCall(VecDestroy(&b)); 2023 } 2024 break; 2025 } 2026 PetscMUMPS_c(mumps); 2027 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2028 2029 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2030 F->ops->solve = MatSolve_MUMPS; 2031 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2032 F->ops->matsolve = MatMatSolve_MUMPS; 2033 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2034 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2035 2036 mumps->matstruc = SAME_NONZERO_PATTERN; 2037 PetscFunctionReturn(PETSC_SUCCESS); 2038 } 2039 2040 /* Note the Petsc r and c permutations are ignored */ 2041 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2042 { 2043 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2044 Vec b; 2045 const PetscInt M = A->rmap->N; 2046 2047 PetscFunctionBegin; 2048 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2049 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2050 PetscFunctionReturn(PETSC_SUCCESS); 2051 } 2052 2053 /* Set MUMPS options from the options database */ 2054 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2055 2056 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2057 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2058 2059 /* analysis phase */ 2060 mumps->id.job = JOB_FACTSYMBOLIC; 2061 mumps->id.n = M; 2062 switch (mumps->id.ICNTL(18)) { 2063 case 0: /* centralized assembled matrix input */ 2064 if (!mumps->myid) { 2065 mumps->id.nnz = mumps->nnz; 2066 mumps->id.irn = mumps->irn; 2067 mumps->id.jcn = mumps->jcn; 2068 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2069 } 2070 break; 2071 case 3: /* distributed assembled matrix input (size>1) */ 2072 mumps->id.nnz_loc = mumps->nnz; 2073 mumps->id.irn_loc = mumps->irn; 2074 mumps->id.jcn_loc = mumps->jcn; 2075 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2076 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2077 PetscCall(MatCreateVecs(A, NULL, &b)); 2078 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2079 PetscCall(VecDestroy(&b)); 2080 } 2081 break; 2082 } 2083 PetscMUMPS_c(mumps); 2084 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2085 2086 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2087 F->ops->solve = MatSolve_MUMPS; 2088 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2089 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2090 2091 mumps->matstruc = SAME_NONZERO_PATTERN; 2092 PetscFunctionReturn(PETSC_SUCCESS); 2093 } 2094 2095 /* Note the Petsc r permutation and factor info are ignored */ 2096 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2097 { 2098 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2099 Vec b; 2100 const PetscInt M = A->rmap->N; 2101 2102 PetscFunctionBegin; 2103 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2104 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2105 PetscFunctionReturn(PETSC_SUCCESS); 2106 } 2107 2108 /* Set MUMPS options from the options database */ 2109 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2110 2111 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2112 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2113 2114 /* analysis phase */ 2115 mumps->id.job = JOB_FACTSYMBOLIC; 2116 mumps->id.n = M; 2117 switch (mumps->id.ICNTL(18)) { 2118 case 0: /* centralized assembled matrix input */ 2119 if (!mumps->myid) { 2120 mumps->id.nnz = mumps->nnz; 2121 mumps->id.irn = mumps->irn; 2122 mumps->id.jcn = mumps->jcn; 2123 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2124 } 2125 break; 2126 case 3: /* distributed assembled matrix input (size>1) */ 2127 mumps->id.nnz_loc = mumps->nnz; 2128 mumps->id.irn_loc = mumps->irn; 2129 mumps->id.jcn_loc = mumps->jcn; 2130 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2131 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2132 PetscCall(MatCreateVecs(A, NULL, &b)); 2133 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2134 PetscCall(VecDestroy(&b)); 2135 } 2136 break; 2137 } 2138 PetscMUMPS_c(mumps); 2139 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2140 2141 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2142 F->ops->solve = MatSolve_MUMPS; 2143 F->ops->solvetranspose = MatSolve_MUMPS; 2144 F->ops->matsolve = MatMatSolve_MUMPS; 2145 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2146 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2147 #if defined(PETSC_USE_COMPLEX) 2148 F->ops->getinertia = NULL; 2149 #else 2150 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2151 #endif 2152 2153 mumps->matstruc = SAME_NONZERO_PATTERN; 2154 PetscFunctionReturn(PETSC_SUCCESS); 2155 } 2156 2157 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2158 { 2159 PetscBool iascii; 2160 PetscViewerFormat format; 2161 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2162 2163 PetscFunctionBegin; 2164 /* check if matrix is mumps type */ 2165 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2166 2167 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2168 if (iascii) { 2169 PetscCall(PetscViewerGetFormat(viewer, &format)); 2170 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2171 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2172 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2173 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2174 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2175 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2176 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2177 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2178 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2179 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2180 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2181 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2182 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2183 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2184 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2185 if (mumps->id.ICNTL(11) > 0) { 2186 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2187 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2188 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2189 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2190 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2191 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2192 } 2193 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2194 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2195 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2196 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2197 /* ICNTL(15-17) not used */ 2198 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2199 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2200 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2201 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2202 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2203 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2204 2205 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2206 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2207 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2208 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2209 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2210 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2211 2212 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2213 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2214 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2215 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2216 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2217 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2218 2219 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2220 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2221 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2222 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2223 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2224 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2225 2226 /* information local to each processor */ 2227 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2228 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2229 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2230 PetscCall(PetscViewerFlush(viewer)); 2231 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2232 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2233 PetscCall(PetscViewerFlush(viewer)); 2234 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2235 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2236 PetscCall(PetscViewerFlush(viewer)); 2237 2238 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2239 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2240 PetscCall(PetscViewerFlush(viewer)); 2241 2242 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2243 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2244 PetscCall(PetscViewerFlush(viewer)); 2245 2246 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2247 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2248 PetscCall(PetscViewerFlush(viewer)); 2249 2250 if (mumps->ninfo && mumps->ninfo <= 80) { 2251 PetscInt i; 2252 for (i = 0; i < mumps->ninfo; i++) { 2253 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2254 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2255 PetscCall(PetscViewerFlush(viewer)); 2256 } 2257 } 2258 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2259 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2260 2261 if (mumps->myid == 0) { /* information from the host */ 2262 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2263 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2264 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2265 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2266 2267 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2268 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2269 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2270 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2271 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2272 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2273 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2274 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2275 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2276 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2277 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2278 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2279 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2280 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2281 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2282 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2283 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2284 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2285 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2286 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2287 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2288 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2289 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2290 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2291 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2292 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2293 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2294 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2295 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2296 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2297 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2298 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2299 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2300 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2301 } 2302 } 2303 } 2304 PetscFunctionReturn(PETSC_SUCCESS); 2305 } 2306 2307 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2308 { 2309 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2310 2311 PetscFunctionBegin; 2312 info->block_size = 1.0; 2313 info->nz_allocated = mumps->id.INFOG(20); 2314 info->nz_used = mumps->id.INFOG(20); 2315 info->nz_unneeded = 0.0; 2316 info->assemblies = 0.0; 2317 info->mallocs = 0.0; 2318 info->memory = 0.0; 2319 info->fill_ratio_given = 0; 2320 info->fill_ratio_needed = 0; 2321 info->factor_mallocs = 0; 2322 PetscFunctionReturn(PETSC_SUCCESS); 2323 } 2324 2325 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2326 { 2327 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2328 const PetscScalar *arr; 2329 const PetscInt *idxs; 2330 PetscInt size, i; 2331 2332 PetscFunctionBegin; 2333 PetscCall(ISGetLocalSize(is, &size)); 2334 /* Schur complement matrix */ 2335 PetscCall(MatDestroy(&F->schur)); 2336 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2337 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2338 mumps->id.schur = (MumpsScalar *)arr; 2339 mumps->id.size_schur = size; 2340 mumps->id.schur_lld = size; 2341 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2342 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2343 2344 /* MUMPS expects Fortran style indices */ 2345 PetscCall(PetscFree(mumps->id.listvar_schur)); 2346 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2347 PetscCall(ISGetIndices(is, &idxs)); 2348 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2349 PetscCall(ISRestoreIndices(is, &idxs)); 2350 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2351 mumps->id.ICNTL(26) = -1; 2352 PetscFunctionReturn(PETSC_SUCCESS); 2353 } 2354 2355 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2356 { 2357 Mat St; 2358 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2359 PetscScalar *array; 2360 #if defined(PETSC_USE_COMPLEX) 2361 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2362 #endif 2363 2364 PetscFunctionBegin; 2365 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2366 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2367 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2368 PetscCall(MatSetType(St, MATDENSE)); 2369 PetscCall(MatSetUp(St)); 2370 PetscCall(MatDenseGetArray(St, &array)); 2371 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2372 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2373 PetscInt i, j, N = mumps->id.size_schur; 2374 for (i = 0; i < N; i++) { 2375 for (j = 0; j < N; j++) { 2376 #if !defined(PETSC_USE_COMPLEX) 2377 PetscScalar val = mumps->id.schur[i * N + j]; 2378 #else 2379 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2380 #endif 2381 array[j * N + i] = val; 2382 } 2383 } 2384 } else { /* stored by columns */ 2385 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2386 } 2387 } else { /* either full or lower-triangular (not packed) */ 2388 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2389 PetscInt i, j, N = mumps->id.size_schur; 2390 for (i = 0; i < N; i++) { 2391 for (j = i; j < N; j++) { 2392 #if !defined(PETSC_USE_COMPLEX) 2393 PetscScalar val = mumps->id.schur[i * N + j]; 2394 #else 2395 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2396 #endif 2397 array[i * N + j] = val; 2398 array[j * N + i] = val; 2399 } 2400 } 2401 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2402 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2403 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2404 PetscInt i, j, N = mumps->id.size_schur; 2405 for (i = 0; i < N; i++) { 2406 for (j = 0; j < i + 1; j++) { 2407 #if !defined(PETSC_USE_COMPLEX) 2408 PetscScalar val = mumps->id.schur[i * N + j]; 2409 #else 2410 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2411 #endif 2412 array[i * N + j] = val; 2413 array[j * N + i] = val; 2414 } 2415 } 2416 } 2417 } 2418 PetscCall(MatDenseRestoreArray(St, &array)); 2419 *S = St; 2420 PetscFunctionReturn(PETSC_SUCCESS); 2421 } 2422 2423 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2424 { 2425 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2426 2427 PetscFunctionBegin; 2428 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2429 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2430 for (i = 0; i < nICNTL_pre; ++i) 2431 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2432 if (i == nICNTL_pre) { /* not already cached */ 2433 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2434 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2435 mumps->ICNTL_pre[0]++; 2436 } 2437 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2438 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2439 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2440 PetscFunctionReturn(PETSC_SUCCESS); 2441 } 2442 2443 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2444 { 2445 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2446 2447 PetscFunctionBegin; 2448 if (mumps->id.job == JOB_NULL) { 2449 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2450 *ival = 0; 2451 for (i = 0; i < nICNTL_pre; ++i) { 2452 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2453 } 2454 } else *ival = mumps->id.ICNTL(icntl); 2455 PetscFunctionReturn(PETSC_SUCCESS); 2456 } 2457 2458 /*@ 2459 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2460 2461 Logically Collective 2462 2463 Input Parameters: 2464 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2465 . icntl - index of MUMPS parameter array ICNTL() 2466 - ival - value of MUMPS ICNTL(icntl) 2467 2468 Options Database Key: 2469 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2470 2471 Level: beginner 2472 2473 References: 2474 . * - MUMPS Users' Guide 2475 2476 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2477 @*/ 2478 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2479 { 2480 PetscFunctionBegin; 2481 PetscValidType(F, 1); 2482 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2483 PetscValidLogicalCollectiveInt(F, icntl, 2); 2484 PetscValidLogicalCollectiveInt(F, ival, 3); 2485 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2486 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2487 PetscFunctionReturn(PETSC_SUCCESS); 2488 } 2489 2490 /*@ 2491 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2492 2493 Logically Collective 2494 2495 Input Parameters: 2496 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2497 - icntl - index of MUMPS parameter array ICNTL() 2498 2499 Output Parameter: 2500 . ival - value of MUMPS ICNTL(icntl) 2501 2502 Level: beginner 2503 2504 References: 2505 . * - MUMPS Users' Guide 2506 2507 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2508 @*/ 2509 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2510 { 2511 PetscFunctionBegin; 2512 PetscValidType(F, 1); 2513 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2514 PetscValidLogicalCollectiveInt(F, icntl, 2); 2515 PetscValidIntPointer(ival, 3); 2516 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2517 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2518 PetscFunctionReturn(PETSC_SUCCESS); 2519 } 2520 2521 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2522 { 2523 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2524 2525 PetscFunctionBegin; 2526 if (mumps->id.job == JOB_NULL) { 2527 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2528 for (i = 0; i < nCNTL_pre; ++i) 2529 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2530 if (i == nCNTL_pre) { 2531 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2532 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2533 mumps->CNTL_pre[0]++; 2534 } 2535 mumps->CNTL_pre[1 + 2 * i] = icntl; 2536 mumps->CNTL_pre[2 + 2 * i] = val; 2537 } else mumps->id.CNTL(icntl) = val; 2538 PetscFunctionReturn(PETSC_SUCCESS); 2539 } 2540 2541 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2542 { 2543 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2544 2545 PetscFunctionBegin; 2546 if (mumps->id.job == JOB_NULL) { 2547 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2548 *val = 0.0; 2549 for (i = 0; i < nCNTL_pre; ++i) { 2550 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2551 } 2552 } else *val = mumps->id.CNTL(icntl); 2553 PetscFunctionReturn(PETSC_SUCCESS); 2554 } 2555 2556 /*@ 2557 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2558 2559 Logically Collective 2560 2561 Input Parameters: 2562 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2563 . icntl - index of MUMPS parameter array CNTL() 2564 - val - value of MUMPS CNTL(icntl) 2565 2566 Options Database Key: 2567 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2568 2569 Level: beginner 2570 2571 References: 2572 . * - MUMPS Users' Guide 2573 2574 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2575 @*/ 2576 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2577 { 2578 PetscFunctionBegin; 2579 PetscValidType(F, 1); 2580 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2581 PetscValidLogicalCollectiveInt(F, icntl, 2); 2582 PetscValidLogicalCollectiveReal(F, val, 3); 2583 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2584 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2585 PetscFunctionReturn(PETSC_SUCCESS); 2586 } 2587 2588 /*@ 2589 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2590 2591 Logically Collective 2592 2593 Input Parameters: 2594 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2595 - icntl - index of MUMPS parameter array CNTL() 2596 2597 Output Parameter: 2598 . val - value of MUMPS CNTL(icntl) 2599 2600 Level: beginner 2601 2602 References: 2603 . * - MUMPS Users' Guide 2604 2605 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2606 @*/ 2607 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2608 { 2609 PetscFunctionBegin; 2610 PetscValidType(F, 1); 2611 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2612 PetscValidLogicalCollectiveInt(F, icntl, 2); 2613 PetscValidRealPointer(val, 3); 2614 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2615 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2616 PetscFunctionReturn(PETSC_SUCCESS); 2617 } 2618 2619 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2620 { 2621 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2622 2623 PetscFunctionBegin; 2624 *info = mumps->id.INFO(icntl); 2625 PetscFunctionReturn(PETSC_SUCCESS); 2626 } 2627 2628 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2629 { 2630 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2631 2632 PetscFunctionBegin; 2633 *infog = mumps->id.INFOG(icntl); 2634 PetscFunctionReturn(PETSC_SUCCESS); 2635 } 2636 2637 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2638 { 2639 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2640 2641 PetscFunctionBegin; 2642 *rinfo = mumps->id.RINFO(icntl); 2643 PetscFunctionReturn(PETSC_SUCCESS); 2644 } 2645 2646 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2647 { 2648 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2649 2650 PetscFunctionBegin; 2651 *rinfog = mumps->id.RINFOG(icntl); 2652 PetscFunctionReturn(PETSC_SUCCESS); 2653 } 2654 2655 PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2656 { 2657 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2658 2659 PetscFunctionBegin; 2660 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2661 *size = 0; 2662 *array = NULL; 2663 if (!mumps->myid) { 2664 *size = mumps->id.INFOG(28); 2665 PetscCall(PetscMalloc1(*size, array)); 2666 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2667 } 2668 PetscFunctionReturn(PETSC_SUCCESS); 2669 } 2670 2671 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2672 { 2673 Mat Bt = NULL, Btseq = NULL; 2674 PetscBool flg; 2675 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2676 PetscScalar *aa; 2677 PetscInt spnr, *ia, *ja, M, nrhs; 2678 2679 PetscFunctionBegin; 2680 PetscValidPointer(spRHS, 2); 2681 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2682 if (flg) { 2683 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2684 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2685 2686 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2687 2688 if (mumps->petsc_size > 1) { 2689 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2690 Btseq = b->A; 2691 } else { 2692 Btseq = Bt; 2693 } 2694 2695 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2696 mumps->id.nrhs = nrhs; 2697 mumps->id.lrhs = M; 2698 mumps->id.rhs = NULL; 2699 2700 if (!mumps->myid) { 2701 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2702 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2703 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2704 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2705 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2706 } else { 2707 mumps->id.irhs_ptr = NULL; 2708 mumps->id.irhs_sparse = NULL; 2709 mumps->id.nz_rhs = 0; 2710 mumps->id.rhs_sparse = NULL; 2711 } 2712 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2713 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2714 2715 /* solve phase */ 2716 mumps->id.job = JOB_SOLVE; 2717 PetscMUMPS_c(mumps); 2718 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 2719 2720 if (!mumps->myid) { 2721 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 2722 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2723 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2724 } 2725 PetscFunctionReturn(PETSC_SUCCESS); 2726 } 2727 2728 /*@ 2729 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 2730 2731 Logically Collective 2732 2733 Input Parameter: 2734 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2735 2736 Output Parameter: 2737 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 2738 2739 Level: beginner 2740 2741 References: 2742 . * - MUMPS Users' Guide 2743 2744 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 2745 @*/ 2746 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2747 { 2748 PetscFunctionBegin; 2749 PetscValidType(F, 1); 2750 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2751 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 2752 PetscFunctionReturn(PETSC_SUCCESS); 2753 } 2754 2755 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 2756 { 2757 Mat spRHS; 2758 2759 PetscFunctionBegin; 2760 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 2761 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 2762 PetscCall(MatDestroy(&spRHS)); 2763 PetscFunctionReturn(PETSC_SUCCESS); 2764 } 2765 2766 /*@ 2767 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 2768 2769 Logically Collective 2770 2771 Input Parameter: 2772 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2773 2774 Output Parameter: 2775 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 2776 2777 Level: beginner 2778 2779 References: 2780 . * - MUMPS Users' Guide 2781 2782 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 2783 @*/ 2784 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 2785 { 2786 PetscBool flg; 2787 2788 PetscFunctionBegin; 2789 PetscValidType(F, 1); 2790 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2791 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 2792 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 2793 2794 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 2795 PetscFunctionReturn(PETSC_SUCCESS); 2796 } 2797 2798 /*@ 2799 MatMumpsGetInfo - Get MUMPS parameter INFO() 2800 2801 Logically Collective 2802 2803 Input Parameters: 2804 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2805 - icntl - index of MUMPS parameter array INFO() 2806 2807 Output Parameter: 2808 . ival - value of MUMPS INFO(icntl) 2809 2810 Level: beginner 2811 2812 References: 2813 . * - MUMPS Users' Guide 2814 2815 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2816 @*/ 2817 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 2818 { 2819 PetscFunctionBegin; 2820 PetscValidType(F, 1); 2821 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2822 PetscValidIntPointer(ival, 3); 2823 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2824 PetscFunctionReturn(PETSC_SUCCESS); 2825 } 2826 2827 /*@ 2828 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2829 2830 Logically Collective 2831 2832 Input Parameters: 2833 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2834 - icntl - index of MUMPS parameter array INFOG() 2835 2836 Output Parameter: 2837 . ival - value of MUMPS INFOG(icntl) 2838 2839 Level: beginner 2840 2841 References: 2842 . * - MUMPS Users' Guide 2843 2844 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2845 @*/ 2846 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 2847 { 2848 PetscFunctionBegin; 2849 PetscValidType(F, 1); 2850 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2851 PetscValidIntPointer(ival, 3); 2852 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2853 PetscFunctionReturn(PETSC_SUCCESS); 2854 } 2855 2856 /*@ 2857 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2858 2859 Logically Collective 2860 2861 Input Parameters: 2862 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2863 - icntl - index of MUMPS parameter array RINFO() 2864 2865 Output Parameter: 2866 . val - value of MUMPS RINFO(icntl) 2867 2868 Level: beginner 2869 2870 References: 2871 . * - MUMPS Users' Guide 2872 2873 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 2874 @*/ 2875 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 2876 { 2877 PetscFunctionBegin; 2878 PetscValidType(F, 1); 2879 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2880 PetscValidRealPointer(val, 3); 2881 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2882 PetscFunctionReturn(PETSC_SUCCESS); 2883 } 2884 2885 /*@ 2886 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2887 2888 Logically Collective 2889 2890 Input Parameters: 2891 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2892 - icntl - index of MUMPS parameter array RINFOG() 2893 2894 Output Parameter: 2895 . val - value of MUMPS RINFOG(icntl) 2896 2897 Level: beginner 2898 2899 References: 2900 . * - MUMPS Users' Guide 2901 2902 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 2903 @*/ 2904 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 2905 { 2906 PetscFunctionBegin; 2907 PetscValidType(F, 1); 2908 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2909 PetscValidRealPointer(val, 3); 2910 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2911 PetscFunctionReturn(PETSC_SUCCESS); 2912 } 2913 2914 /*@ 2915 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 2916 2917 Logically Collective 2918 2919 Input Parameter: 2920 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2921 2922 Output Parameters: 2923 + size - local size of the array. The size of the array is non-zero only on the host. 2924 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 2925 for freeing this array. 2926 2927 Level: beginner 2928 2929 References: 2930 . * - MUMPS Users' Guide 2931 2932 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 2933 @*/ 2934 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 2935 { 2936 PetscFunctionBegin; 2937 PetscValidType(F, 1); 2938 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2939 PetscValidIntPointer(size, 3); 2940 PetscValidPointer(array, 4); 2941 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 2942 PetscFunctionReturn(PETSC_SUCCESS); 2943 } 2944 2945 /*MC 2946 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2947 distributed and sequential matrices via the external package MUMPS. 2948 2949 Works with `MATAIJ` and `MATSBAIJ` matrices 2950 2951 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2952 2953 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 2954 See details below. 2955 2956 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 2957 2958 Options Database Keys: 2959 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2960 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2961 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2962 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2963 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2964 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2965 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 2966 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2967 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2968 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2969 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2970 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2971 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2972 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 2973 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2974 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2975 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2976 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2977 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2978 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2979 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2980 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2981 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2982 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2983 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2984 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2985 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2986 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2987 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2988 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2989 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2990 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2991 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2992 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2993 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2994 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2995 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2996 2997 Level: beginner 2998 2999 Notes: 3000 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 3001 error if the matrix is Hermitian. 3002 3003 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3004 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3005 3006 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3007 the failure with 3008 .vb 3009 KSPGetPC(ksp,&pc); 3010 PCFactorGetMatrix(pc,&mat); 3011 MatMumpsGetInfo(mat,....); 3012 MatMumpsGetInfog(mat,....); etc. 3013 .ve 3014 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3015 3016 MUMPS provides 64-bit integer support in two build modes: 3017 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3018 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3019 3020 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3021 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3022 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3023 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3024 3025 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3026 3027 Two modes to run MUMPS/PETSc with OpenMP 3028 .vb 3029 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3030 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3031 .ve 3032 3033 .vb 3034 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3035 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3036 .ve 3037 3038 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3039 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3040 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3041 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3042 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3043 3044 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3045 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3046 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3047 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3048 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3049 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3050 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3051 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3052 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3053 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3054 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3055 examine the mapping result. 3056 3057 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3058 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3059 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3060 3061 References: 3062 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3063 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3064 3065 .seealso: [](chapter_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3066 M*/ 3067 3068 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3069 { 3070 PetscFunctionBegin; 3071 *type = MATSOLVERMUMPS; 3072 PetscFunctionReturn(PETSC_SUCCESS); 3073 } 3074 3075 /* MatGetFactor for Seq and MPI AIJ matrices */ 3076 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3077 { 3078 Mat B; 3079 Mat_MUMPS *mumps; 3080 PetscBool isSeqAIJ; 3081 PetscMPIInt size; 3082 3083 PetscFunctionBegin; 3084 #if defined(PETSC_USE_COMPLEX) 3085 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3086 #endif 3087 /* Create the factorization matrix */ 3088 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3089 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3090 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3091 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3092 PetscCall(MatSetUp(B)); 3093 3094 PetscCall(PetscNew(&mumps)); 3095 3096 B->ops->view = MatView_MUMPS; 3097 B->ops->getinfo = MatGetInfo_MUMPS; 3098 3099 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3100 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3101 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3102 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3103 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3104 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3105 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3106 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3107 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3108 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3109 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3110 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3111 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3112 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3113 3114 if (ftype == MAT_FACTOR_LU) { 3115 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3116 B->factortype = MAT_FACTOR_LU; 3117 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3118 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3119 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3120 mumps->sym = 0; 3121 } else { 3122 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3123 B->factortype = MAT_FACTOR_CHOLESKY; 3124 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3125 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3126 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3127 #if defined(PETSC_USE_COMPLEX) 3128 mumps->sym = 2; 3129 #else 3130 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3131 else mumps->sym = 2; 3132 #endif 3133 } 3134 3135 /* set solvertype */ 3136 PetscCall(PetscFree(B->solvertype)); 3137 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3138 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3139 if (size == 1) { 3140 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3141 B->canuseordering = PETSC_TRUE; 3142 } 3143 B->ops->destroy = MatDestroy_MUMPS; 3144 B->data = (void *)mumps; 3145 3146 *F = B; 3147 mumps->id.job = JOB_NULL; 3148 mumps->ICNTL_pre = NULL; 3149 mumps->CNTL_pre = NULL; 3150 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3151 PetscFunctionReturn(PETSC_SUCCESS); 3152 } 3153 3154 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3155 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3156 { 3157 Mat B; 3158 Mat_MUMPS *mumps; 3159 PetscBool isSeqSBAIJ; 3160 PetscMPIInt size; 3161 3162 PetscFunctionBegin; 3163 #if defined(PETSC_USE_COMPLEX) 3164 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3165 #endif 3166 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3167 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3168 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3169 PetscCall(MatSetUp(B)); 3170 3171 PetscCall(PetscNew(&mumps)); 3172 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3173 if (isSeqSBAIJ) { 3174 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3175 } else { 3176 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3177 } 3178 3179 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3180 B->ops->view = MatView_MUMPS; 3181 B->ops->getinfo = MatGetInfo_MUMPS; 3182 3183 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3184 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3185 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3186 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3187 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3188 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3189 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3190 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3191 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3192 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3193 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3194 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3195 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3196 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3197 3198 B->factortype = MAT_FACTOR_CHOLESKY; 3199 #if defined(PETSC_USE_COMPLEX) 3200 mumps->sym = 2; 3201 #else 3202 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3203 else mumps->sym = 2; 3204 #endif 3205 3206 /* set solvertype */ 3207 PetscCall(PetscFree(B->solvertype)); 3208 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3209 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3210 if (size == 1) { 3211 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3212 B->canuseordering = PETSC_TRUE; 3213 } 3214 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3215 B->ops->destroy = MatDestroy_MUMPS; 3216 B->data = (void *)mumps; 3217 3218 *F = B; 3219 mumps->id.job = JOB_NULL; 3220 mumps->ICNTL_pre = NULL; 3221 mumps->CNTL_pre = NULL; 3222 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3223 PetscFunctionReturn(PETSC_SUCCESS); 3224 } 3225 3226 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3227 { 3228 Mat B; 3229 Mat_MUMPS *mumps; 3230 PetscBool isSeqBAIJ; 3231 PetscMPIInt size; 3232 3233 PetscFunctionBegin; 3234 /* Create the factorization matrix */ 3235 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3236 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3237 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3238 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3239 PetscCall(MatSetUp(B)); 3240 3241 PetscCall(PetscNew(&mumps)); 3242 if (ftype == MAT_FACTOR_LU) { 3243 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3244 B->factortype = MAT_FACTOR_LU; 3245 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3246 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3247 mumps->sym = 0; 3248 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3249 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3250 3251 B->ops->view = MatView_MUMPS; 3252 B->ops->getinfo = MatGetInfo_MUMPS; 3253 3254 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3255 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3256 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3257 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3258 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3259 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3260 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3261 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3262 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3263 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3264 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3265 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3266 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3267 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3268 3269 /* set solvertype */ 3270 PetscCall(PetscFree(B->solvertype)); 3271 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3272 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3273 if (size == 1) { 3274 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3275 B->canuseordering = PETSC_TRUE; 3276 } 3277 B->ops->destroy = MatDestroy_MUMPS; 3278 B->data = (void *)mumps; 3279 3280 *F = B; 3281 mumps->id.job = JOB_NULL; 3282 mumps->ICNTL_pre = NULL; 3283 mumps->CNTL_pre = NULL; 3284 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3285 PetscFunctionReturn(PETSC_SUCCESS); 3286 } 3287 3288 /* MatGetFactor for Seq and MPI SELL matrices */ 3289 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3290 { 3291 Mat B; 3292 Mat_MUMPS *mumps; 3293 PetscBool isSeqSELL; 3294 PetscMPIInt size; 3295 3296 PetscFunctionBegin; 3297 /* Create the factorization matrix */ 3298 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3299 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3300 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3301 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3302 PetscCall(MatSetUp(B)); 3303 3304 PetscCall(PetscNew(&mumps)); 3305 3306 B->ops->view = MatView_MUMPS; 3307 B->ops->getinfo = MatGetInfo_MUMPS; 3308 3309 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3310 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3311 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3312 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3313 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3314 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3315 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3316 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3317 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3318 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3319 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3320 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3321 3322 if (ftype == MAT_FACTOR_LU) { 3323 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3324 B->factortype = MAT_FACTOR_LU; 3325 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3326 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3327 mumps->sym = 0; 3328 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3329 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3330 3331 /* set solvertype */ 3332 PetscCall(PetscFree(B->solvertype)); 3333 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3334 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3335 if (size == 1) { 3336 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3337 B->canuseordering = PETSC_TRUE; 3338 } 3339 B->ops->destroy = MatDestroy_MUMPS; 3340 B->data = (void *)mumps; 3341 3342 *F = B; 3343 mumps->id.job = JOB_NULL; 3344 mumps->ICNTL_pre = NULL; 3345 mumps->CNTL_pre = NULL; 3346 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3347 PetscFunctionReturn(PETSC_SUCCESS); 3348 } 3349 3350 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3351 { 3352 PetscFunctionBegin; 3353 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3354 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3355 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3356 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3357 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3358 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3359 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3360 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3361 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3362 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3363 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3364 PetscFunctionReturn(PETSC_SUCCESS); 3365 } 3366