1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_NULL 0 27 #define JOB_FACTSYMBOLIC 1 28 #define JOB_FACTNUMERIC 2 29 #define JOB_SOLVE 3 30 #define JOB_END -2 31 32 /* calls to MUMPS */ 33 #if defined(PETSC_USE_COMPLEX) 34 #if defined(PETSC_USE_REAL_SINGLE) 35 #define MUMPS_c cmumps_c 36 #else 37 #define MUMPS_c zmumps_c 38 #endif 39 #else 40 #if defined(PETSC_USE_REAL_SINGLE) 41 #define MUMPS_c smumps_c 42 #else 43 #define MUMPS_c dmumps_c 44 #endif 45 #endif 46 47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 48 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 49 naming convention in PetscMPIInt, PetscBLASInt etc. 50 */ 51 typedef MUMPS_INT PetscMUMPSInt; 52 53 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 54 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 55 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 56 #endif 57 #else 58 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 59 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 60 #endif 61 #endif 62 63 #define MPIU_MUMPSINT MPI_INT 64 #define PETSC_MUMPS_INT_MAX 2147483647 65 #define PETSC_MUMPS_INT_MIN -2147483648 66 67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 69 { 70 PetscFunctionBegin; 71 #if PetscDefined(USE_64BIT_INDICES) 72 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 73 #endif 74 *b = (PetscMUMPSInt)(a); 75 PetscFunctionReturn(PETSC_SUCCESS); 76 } 77 78 /* Put these utility routines here since they are only used in this file */ 79 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 80 { 81 PetscInt myval; 82 PetscBool myset; 83 PetscFunctionBegin; 84 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 85 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 86 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 87 if (set) *set = myset; 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 91 92 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 93 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 94 #define PetscMUMPS_c(mumps) \ 95 do { \ 96 if (mumps->use_petsc_omp_support) { \ 97 if (mumps->is_omp_master) { \ 98 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 99 MUMPS_c(&mumps->id); \ 100 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 101 } \ 102 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 103 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 104 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 105 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 106 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 107 */ \ 108 PetscCallMPI(MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 109 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm)); \ 110 PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 111 } else { \ 112 MUMPS_c(&mumps->id); \ 113 } \ 114 } while (0) 115 #else 116 #define PetscMUMPS_c(mumps) \ 117 do { \ 118 MUMPS_c(&mumps->id); \ 119 } while (0) 120 #endif 121 122 /* declare MumpsScalar */ 123 #if defined(PETSC_USE_COMPLEX) 124 #if defined(PETSC_USE_REAL_SINGLE) 125 #define MumpsScalar mumps_complex 126 #else 127 #define MumpsScalar mumps_double_complex 128 #endif 129 #else 130 #define MumpsScalar PetscScalar 131 #endif 132 133 /* macros s.t. indices match MUMPS documentation */ 134 #define ICNTL(I) icntl[(I)-1] 135 #define CNTL(I) cntl[(I)-1] 136 #define INFOG(I) infog[(I)-1] 137 #define INFO(I) info[(I)-1] 138 #define RINFOG(I) rinfog[(I)-1] 139 #define RINFO(I) rinfo[(I)-1] 140 141 typedef struct Mat_MUMPS Mat_MUMPS; 142 struct Mat_MUMPS { 143 #if defined(PETSC_USE_COMPLEX) 144 #if defined(PETSC_USE_REAL_SINGLE) 145 CMUMPS_STRUC_C id; 146 #else 147 ZMUMPS_STRUC_C id; 148 #endif 149 #else 150 #if defined(PETSC_USE_REAL_SINGLE) 151 SMUMPS_STRUC_C id; 152 #else 153 DMUMPS_STRUC_C id; 154 #endif 155 #endif 156 157 MatStructure matstruc; 158 PetscMPIInt myid, petsc_size; 159 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 160 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 161 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 162 PetscMUMPSInt sym; 163 MPI_Comm mumps_comm; 164 PetscMUMPSInt *ICNTL_pre; 165 PetscReal *CNTL_pre; 166 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 167 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 168 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 169 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 170 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 171 PetscInt *rhs_nrow, max_nrhs; 172 PetscMPIInt *rhs_recvcounts, *rhs_disps; 173 PetscScalar *rhs_loc, *rhs_recvbuf; 174 #endif 175 Vec b_seq, x_seq; 176 PetscInt ninfo, *info; /* which INFO to display */ 177 PetscInt sizeredrhs; 178 PetscScalar *schur_sol; 179 PetscInt schur_sizesol; 180 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 181 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 182 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 183 184 /* stuff used by petsc/mumps OpenMP support*/ 185 PetscBool use_petsc_omp_support; 186 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 187 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 188 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 189 PetscMPIInt tag, omp_comm_size; 190 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 191 MPI_Request *reqs; 192 }; 193 194 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 195 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 196 */ 197 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 198 { 199 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 200 201 PetscFunctionBegin; 202 #if defined(PETSC_USE_64BIT_INDICES) 203 { 204 PetscInt i; 205 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 206 PetscCall(PetscFree(mumps->ia_alloc)); 207 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 208 mumps->cur_ilen = nrow + 1; 209 } 210 if (nnz > mumps->cur_jlen) { 211 PetscCall(PetscFree(mumps->ja_alloc)); 212 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 213 mumps->cur_jlen = nnz; 214 } 215 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 216 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 217 *ia_mumps = mumps->ia_alloc; 218 *ja_mumps = mumps->ja_alloc; 219 } 220 #else 221 *ia_mumps = ia; 222 *ja_mumps = ja; 223 #endif 224 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 225 PetscFunctionReturn(PETSC_SUCCESS); 226 } 227 228 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 229 { 230 PetscFunctionBegin; 231 PetscCall(PetscFree(mumps->id.listvar_schur)); 232 PetscCall(PetscFree(mumps->id.redrhs)); 233 PetscCall(PetscFree(mumps->schur_sol)); 234 mumps->id.size_schur = 0; 235 mumps->id.schur_lld = 0; 236 mumps->id.ICNTL(19) = 0; 237 PetscFunctionReturn(PETSC_SUCCESS); 238 } 239 240 /* solve with rhs in mumps->id.redrhs and return in the same location */ 241 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 242 { 243 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 244 Mat S, B, X; 245 MatFactorSchurStatus schurstatus; 246 PetscInt sizesol; 247 248 PetscFunctionBegin; 249 PetscCall(MatFactorFactorizeSchurComplement(F)); 250 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 251 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 252 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 253 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 254 PetscCall(MatBindToCPU(B, S->boundtocpu)); 255 #endif 256 switch (schurstatus) { 257 case MAT_FACTOR_SCHUR_FACTORED: 258 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 259 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 260 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 261 PetscCall(MatBindToCPU(X, S->boundtocpu)); 262 #endif 263 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 264 PetscCall(MatMatSolveTranspose(S, B, X)); 265 } else { 266 PetscCall(MatMatSolve(S, B, X)); 267 } 268 break; 269 case MAT_FACTOR_SCHUR_INVERTED: 270 sizesol = mumps->id.nrhs * mumps->id.size_schur; 271 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 272 PetscCall(PetscFree(mumps->schur_sol)); 273 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 274 mumps->schur_sizesol = sizesol; 275 } 276 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 277 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 278 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 279 PetscCall(MatBindToCPU(X, S->boundtocpu)); 280 #endif 281 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 282 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 283 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 284 } else { 285 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 286 } 287 PetscCall(MatProductSetFromOptions(X)); 288 PetscCall(MatProductSymbolic(X)); 289 PetscCall(MatProductNumeric(X)); 290 291 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 292 break; 293 default: 294 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 295 } 296 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 297 PetscCall(MatDestroy(&B)); 298 PetscCall(MatDestroy(&X)); 299 PetscFunctionReturn(PETSC_SUCCESS); 300 } 301 302 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 303 { 304 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 305 306 PetscFunctionBegin; 307 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 308 PetscFunctionReturn(PETSC_SUCCESS); 309 } 310 if (!expansion) { /* prepare for the condensation step */ 311 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 312 /* allocate MUMPS internal array to store reduced right-hand sides */ 313 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 314 PetscCall(PetscFree(mumps->id.redrhs)); 315 mumps->id.lredrhs = mumps->id.size_schur; 316 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 317 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 318 } 319 mumps->id.ICNTL(26) = 1; /* condensation phase */ 320 } else { /* prepare for the expansion step */ 321 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 322 PetscCall(MatMumpsSolveSchur_Private(F)); 323 mumps->id.ICNTL(26) = 2; /* expansion phase */ 324 PetscMUMPS_c(mumps); 325 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 326 /* restore defaults */ 327 mumps->id.ICNTL(26) = -1; 328 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 329 if (mumps->id.nrhs > 1) { 330 PetscCall(PetscFree(mumps->id.redrhs)); 331 mumps->id.lredrhs = 0; 332 mumps->sizeredrhs = 0; 333 } 334 } 335 PetscFunctionReturn(PETSC_SUCCESS); 336 } 337 338 /* 339 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 340 341 input: 342 A - matrix in aij,baij or sbaij format 343 shift - 0: C style output triple; 1: Fortran style output triple. 344 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 345 MAT_REUSE_MATRIX: only the values in v array are updated 346 output: 347 nnz - dim of r, c, and v (number of local nonzero entries of A) 348 r, c, v - row and col index, matrix values (matrix triples) 349 350 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 351 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 352 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 353 354 */ 355 356 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 357 { 358 const PetscScalar *av; 359 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 360 PetscInt64 nz, rnz, i, j, k; 361 PetscMUMPSInt *row, *col; 362 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 363 364 PetscFunctionBegin; 365 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 366 mumps->val = (PetscScalar *)av; 367 if (reuse == MAT_INITIAL_MATRIX) { 368 nz = aa->nz; 369 ai = aa->i; 370 aj = aa->j; 371 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 372 for (i = k = 0; i < M; i++) { 373 rnz = ai[i + 1] - ai[i]; 374 ajj = aj + ai[i]; 375 for (j = 0; j < rnz; j++) { 376 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 377 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 378 k++; 379 } 380 } 381 mumps->irn = row; 382 mumps->jcn = col; 383 mumps->nnz = nz; 384 } 385 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 386 PetscFunctionReturn(PETSC_SUCCESS); 387 } 388 389 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 390 { 391 PetscInt64 nz, i, j, k, r; 392 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 393 PetscMUMPSInt *row, *col; 394 395 PetscFunctionBegin; 396 mumps->val = a->val; 397 if (reuse == MAT_INITIAL_MATRIX) { 398 nz = a->sliidx[a->totalslices]; 399 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 400 for (i = k = 0; i < a->totalslices; i++) { 401 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 402 } 403 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 404 mumps->irn = row; 405 mumps->jcn = col; 406 mumps->nnz = nz; 407 } 408 PetscFunctionReturn(PETSC_SUCCESS); 409 } 410 411 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 412 { 413 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 414 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 415 PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 416 PetscInt bs; 417 PetscMUMPSInt *row, *col; 418 419 PetscFunctionBegin; 420 PetscCall(MatGetBlockSize(A, &bs)); 421 M = A->rmap->N / bs; 422 mumps->val = aa->a; 423 if (reuse == MAT_INITIAL_MATRIX) { 424 ai = aa->i; 425 aj = aa->j; 426 nz = bs2 * aa->nz; 427 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 428 for (i = 0; i < M; i++) { 429 ajj = aj + ai[i]; 430 rnz = ai[i + 1] - ai[i]; 431 for (k = 0; k < rnz; k++) { 432 for (j = 0; j < bs; j++) { 433 for (m = 0; m < bs; m++) { 434 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 435 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 436 idx++; 437 } 438 } 439 } 440 } 441 mumps->irn = row; 442 mumps->jcn = col; 443 mumps->nnz = nz; 444 } 445 PetscFunctionReturn(PETSC_SUCCESS); 446 } 447 448 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 449 { 450 const PetscInt *ai, *aj, *ajj; 451 PetscInt bs; 452 PetscInt64 nz, rnz, i, j, k, m; 453 PetscMUMPSInt *row, *col; 454 PetscScalar *val; 455 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 456 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 457 #if defined(PETSC_USE_COMPLEX) 458 PetscBool isset, hermitian; 459 #endif 460 461 PetscFunctionBegin; 462 #if defined(PETSC_USE_COMPLEX) 463 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 464 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 465 #endif 466 ai = aa->i; 467 aj = aa->j; 468 PetscCall(MatGetBlockSize(A, &bs)); 469 if (reuse == MAT_INITIAL_MATRIX) { 470 const PetscInt64 alloc_size = aa->nz * bs2; 471 472 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 473 if (bs > 1) { 474 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 475 mumps->val = mumps->val_alloc; 476 } else { 477 mumps->val = aa->a; 478 } 479 mumps->irn = row; 480 mumps->jcn = col; 481 } else { 482 if (bs == 1) mumps->val = aa->a; 483 row = mumps->irn; 484 col = mumps->jcn; 485 } 486 val = mumps->val; 487 488 nz = 0; 489 if (bs > 1) { 490 for (i = 0; i < mbs; i++) { 491 rnz = ai[i + 1] - ai[i]; 492 ajj = aj + ai[i]; 493 for (j = 0; j < rnz; j++) { 494 for (k = 0; k < bs; k++) { 495 for (m = 0; m < bs; m++) { 496 if (ajj[j] > i || k >= m) { 497 if (reuse == MAT_INITIAL_MATRIX) { 498 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 499 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 500 } 501 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 502 } 503 } 504 } 505 } 506 } 507 } else if (reuse == MAT_INITIAL_MATRIX) { 508 for (i = 0; i < mbs; i++) { 509 rnz = ai[i + 1] - ai[i]; 510 ajj = aj + ai[i]; 511 for (j = 0; j < rnz; j++) { 512 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 513 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 514 nz++; 515 } 516 } 517 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 518 } 519 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 520 PetscFunctionReturn(PETSC_SUCCESS); 521 } 522 523 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 524 { 525 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 526 PetscInt64 nz, rnz, i, j; 527 const PetscScalar *av, *v1; 528 PetscScalar *val; 529 PetscMUMPSInt *row, *col; 530 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 531 PetscBool missing; 532 #if defined(PETSC_USE_COMPLEX) 533 PetscBool hermitian, isset; 534 #endif 535 536 PetscFunctionBegin; 537 #if defined(PETSC_USE_COMPLEX) 538 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 539 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 540 #endif 541 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 542 ai = aa->i; 543 aj = aa->j; 544 adiag = aa->diag; 545 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 546 if (reuse == MAT_INITIAL_MATRIX) { 547 /* count nz in the upper triangular part of A */ 548 nz = 0; 549 if (missing) { 550 for (i = 0; i < M; i++) { 551 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 552 for (j = ai[i]; j < ai[i + 1]; j++) { 553 if (aj[j] < i) continue; 554 nz++; 555 } 556 } else { 557 nz += ai[i + 1] - adiag[i]; 558 } 559 } 560 } else { 561 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 562 } 563 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 564 PetscCall(PetscMalloc1(nz, &val)); 565 mumps->nnz = nz; 566 mumps->irn = row; 567 mumps->jcn = col; 568 mumps->val = mumps->val_alloc = val; 569 570 nz = 0; 571 if (missing) { 572 for (i = 0; i < M; i++) { 573 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 574 for (j = ai[i]; j < ai[i + 1]; j++) { 575 if (aj[j] < i) continue; 576 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 577 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 578 val[nz] = av[j]; 579 nz++; 580 } 581 } else { 582 rnz = ai[i + 1] - adiag[i]; 583 ajj = aj + adiag[i]; 584 v1 = av + adiag[i]; 585 for (j = 0; j < rnz; j++) { 586 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 587 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 588 val[nz++] = v1[j]; 589 } 590 } 591 } 592 } else { 593 for (i = 0; i < M; i++) { 594 rnz = ai[i + 1] - adiag[i]; 595 ajj = aj + adiag[i]; 596 v1 = av + adiag[i]; 597 for (j = 0; j < rnz; j++) { 598 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 599 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 600 val[nz++] = v1[j]; 601 } 602 } 603 } 604 } else { 605 nz = 0; 606 val = mumps->val; 607 if (missing) { 608 for (i = 0; i < M; i++) { 609 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 610 for (j = ai[i]; j < ai[i + 1]; j++) { 611 if (aj[j] < i) continue; 612 val[nz++] = av[j]; 613 } 614 } else { 615 rnz = ai[i + 1] - adiag[i]; 616 v1 = av + adiag[i]; 617 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 618 } 619 } 620 } else { 621 for (i = 0; i < M; i++) { 622 rnz = ai[i + 1] - adiag[i]; 623 v1 = av + adiag[i]; 624 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 625 } 626 } 627 } 628 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 629 PetscFunctionReturn(PETSC_SUCCESS); 630 } 631 632 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 633 { 634 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 635 PetscInt bs; 636 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 637 PetscMUMPSInt *row, *col; 638 const PetscScalar *av, *bv, *v1, *v2; 639 PetscScalar *val; 640 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 641 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 642 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 643 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 644 #if defined(PETSC_USE_COMPLEX) 645 PetscBool hermitian, isset; 646 #endif 647 648 PetscFunctionBegin; 649 #if defined(PETSC_USE_COMPLEX) 650 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 651 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 652 #endif 653 PetscCall(MatGetBlockSize(A, &bs)); 654 rstart = A->rmap->rstart; 655 ai = aa->i; 656 aj = aa->j; 657 bi = bb->i; 658 bj = bb->j; 659 av = aa->a; 660 bv = bb->a; 661 662 garray = mat->garray; 663 664 if (reuse == MAT_INITIAL_MATRIX) { 665 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 666 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 667 PetscCall(PetscMalloc1(nz, &val)); 668 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 669 mumps->irn = row; 670 mumps->jcn = col; 671 mumps->val = mumps->val_alloc = val; 672 } else { 673 val = mumps->val; 674 } 675 676 jj = 0; 677 irow = rstart; 678 for (i = 0; i < mbs; i++) { 679 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 680 countA = ai[i + 1] - ai[i]; 681 countB = bi[i + 1] - bi[i]; 682 bjj = bj + bi[i]; 683 v1 = av + ai[i] * bs2; 684 v2 = bv + bi[i] * bs2; 685 686 if (bs > 1) { 687 /* A-part */ 688 for (j = 0; j < countA; j++) { 689 for (k = 0; k < bs; k++) { 690 for (m = 0; m < bs; m++) { 691 if (rstart + ajj[j] * bs > irow || k >= m) { 692 if (reuse == MAT_INITIAL_MATRIX) { 693 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 694 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 695 } 696 val[jj++] = v1[j * bs2 + m + k * bs]; 697 } 698 } 699 } 700 } 701 702 /* B-part */ 703 for (j = 0; j < countB; j++) { 704 for (k = 0; k < bs; k++) { 705 for (m = 0; m < bs; m++) { 706 if (reuse == MAT_INITIAL_MATRIX) { 707 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 708 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 709 } 710 val[jj++] = v2[j * bs2 + m + k * bs]; 711 } 712 } 713 } 714 } else { 715 /* A-part */ 716 for (j = 0; j < countA; j++) { 717 if (reuse == MAT_INITIAL_MATRIX) { 718 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 719 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 720 } 721 val[jj++] = v1[j]; 722 } 723 724 /* B-part */ 725 for (j = 0; j < countB; j++) { 726 if (reuse == MAT_INITIAL_MATRIX) { 727 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 728 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 729 } 730 val[jj++] = v2[j]; 731 } 732 } 733 irow += bs; 734 } 735 mumps->nnz = jj; 736 PetscFunctionReturn(PETSC_SUCCESS); 737 } 738 739 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 740 { 741 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 742 PetscInt64 rstart, nz, i, j, jj, irow, countA, countB; 743 PetscMUMPSInt *row, *col; 744 const PetscScalar *av, *bv, *v1, *v2; 745 PetscScalar *val; 746 Mat Ad, Ao; 747 Mat_SeqAIJ *aa; 748 Mat_SeqAIJ *bb; 749 750 PetscFunctionBegin; 751 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 752 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 753 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 754 755 aa = (Mat_SeqAIJ *)(Ad)->data; 756 bb = (Mat_SeqAIJ *)(Ao)->data; 757 ai = aa->i; 758 aj = aa->j; 759 bi = bb->i; 760 bj = bb->j; 761 762 rstart = A->rmap->rstart; 763 764 if (reuse == MAT_INITIAL_MATRIX) { 765 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 766 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 767 PetscCall(PetscMalloc1(nz, &val)); 768 mumps->nnz = nz; 769 mumps->irn = row; 770 mumps->jcn = col; 771 mumps->val = mumps->val_alloc = val; 772 } else { 773 val = mumps->val; 774 } 775 776 jj = 0; 777 irow = rstart; 778 for (i = 0; i < m; i++) { 779 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 780 countA = ai[i + 1] - ai[i]; 781 countB = bi[i + 1] - bi[i]; 782 bjj = bj + bi[i]; 783 v1 = av + ai[i]; 784 v2 = bv + bi[i]; 785 786 /* A-part */ 787 for (j = 0; j < countA; j++) { 788 if (reuse == MAT_INITIAL_MATRIX) { 789 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 790 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 791 } 792 val[jj++] = v1[j]; 793 } 794 795 /* B-part */ 796 for (j = 0; j < countB; j++) { 797 if (reuse == MAT_INITIAL_MATRIX) { 798 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 799 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 800 } 801 val[jj++] = v2[j]; 802 } 803 irow++; 804 } 805 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 806 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 807 PetscFunctionReturn(PETSC_SUCCESS); 808 } 809 810 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 811 { 812 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 813 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 814 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 815 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 816 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart; 817 const PetscInt bs2 = mat->bs2; 818 PetscInt bs; 819 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 820 PetscMUMPSInt *row, *col; 821 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 822 PetscScalar *val; 823 824 PetscFunctionBegin; 825 PetscCall(MatGetBlockSize(A, &bs)); 826 if (reuse == MAT_INITIAL_MATRIX) { 827 nz = bs2 * (aa->nz + bb->nz); 828 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 829 PetscCall(PetscMalloc1(nz, &val)); 830 mumps->nnz = nz; 831 mumps->irn = row; 832 mumps->jcn = col; 833 mumps->val = mumps->val_alloc = val; 834 } else { 835 val = mumps->val; 836 } 837 838 jj = 0; 839 irow = rstart; 840 for (i = 0; i < mbs; i++) { 841 countA = ai[i + 1] - ai[i]; 842 countB = bi[i + 1] - bi[i]; 843 ajj = aj + ai[i]; 844 bjj = bj + bi[i]; 845 v1 = av + bs2 * ai[i]; 846 v2 = bv + bs2 * bi[i]; 847 848 idx = 0; 849 /* A-part */ 850 for (k = 0; k < countA; k++) { 851 for (j = 0; j < bs; j++) { 852 for (n = 0; n < bs; n++) { 853 if (reuse == MAT_INITIAL_MATRIX) { 854 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 855 PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj])); 856 } 857 val[jj++] = v1[idx++]; 858 } 859 } 860 } 861 862 idx = 0; 863 /* B-part */ 864 for (k = 0; k < countB; k++) { 865 for (j = 0; j < bs; j++) { 866 for (n = 0; n < bs; n++) { 867 if (reuse == MAT_INITIAL_MATRIX) { 868 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 869 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 870 } 871 val[jj++] = v2[idx++]; 872 } 873 } 874 } 875 irow += bs; 876 } 877 PetscFunctionReturn(PETSC_SUCCESS); 878 } 879 880 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 881 { 882 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 883 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 884 PetscMUMPSInt *row, *col; 885 const PetscScalar *av, *bv, *v1, *v2; 886 PetscScalar *val; 887 Mat Ad, Ao; 888 Mat_SeqAIJ *aa; 889 Mat_SeqAIJ *bb; 890 #if defined(PETSC_USE_COMPLEX) 891 PetscBool hermitian, isset; 892 #endif 893 894 PetscFunctionBegin; 895 #if defined(PETSC_USE_COMPLEX) 896 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 897 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 898 #endif 899 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 900 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 901 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 902 903 aa = (Mat_SeqAIJ *)(Ad)->data; 904 bb = (Mat_SeqAIJ *)(Ao)->data; 905 ai = aa->i; 906 aj = aa->j; 907 adiag = aa->diag; 908 bi = bb->i; 909 bj = bb->j; 910 911 rstart = A->rmap->rstart; 912 913 if (reuse == MAT_INITIAL_MATRIX) { 914 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 915 nzb = 0; /* num of upper triangular entries in mat->B */ 916 for (i = 0; i < m; i++) { 917 nza += (ai[i + 1] - adiag[i]); 918 countB = bi[i + 1] - bi[i]; 919 bjj = bj + bi[i]; 920 for (j = 0; j < countB; j++) { 921 if (garray[bjj[j]] > rstart) nzb++; 922 } 923 } 924 925 nz = nza + nzb; /* total nz of upper triangular part of mat */ 926 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 927 PetscCall(PetscMalloc1(nz, &val)); 928 mumps->nnz = nz; 929 mumps->irn = row; 930 mumps->jcn = col; 931 mumps->val = mumps->val_alloc = val; 932 } else { 933 val = mumps->val; 934 } 935 936 jj = 0; 937 irow = rstart; 938 for (i = 0; i < m; i++) { 939 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 940 v1 = av + adiag[i]; 941 countA = ai[i + 1] - adiag[i]; 942 countB = bi[i + 1] - bi[i]; 943 bjj = bj + bi[i]; 944 v2 = bv + bi[i]; 945 946 /* A-part */ 947 for (j = 0; j < countA; j++) { 948 if (reuse == MAT_INITIAL_MATRIX) { 949 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 950 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 951 } 952 val[jj++] = v1[j]; 953 } 954 955 /* B-part */ 956 for (j = 0; j < countB; j++) { 957 if (garray[bjj[j]] > rstart) { 958 if (reuse == MAT_INITIAL_MATRIX) { 959 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 960 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 961 } 962 val[jj++] = v2[j]; 963 } 964 } 965 irow++; 966 } 967 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 968 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 969 PetscFunctionReturn(PETSC_SUCCESS); 970 } 971 972 PetscErrorCode MatDestroy_MUMPS(Mat A) 973 { 974 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 975 976 PetscFunctionBegin; 977 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 978 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 979 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 980 PetscCall(VecDestroy(&mumps->b_seq)); 981 PetscCall(VecDestroy(&mumps->x_seq)); 982 PetscCall(PetscFree(mumps->id.perm_in)); 983 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 984 PetscCall(PetscFree(mumps->val_alloc)); 985 PetscCall(PetscFree(mumps->info)); 986 PetscCall(PetscFree(mumps->ICNTL_pre)); 987 PetscCall(PetscFree(mumps->CNTL_pre)); 988 PetscCall(MatMumpsResetSchur_Private(mumps)); 989 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 990 mumps->id.job = JOB_END; 991 PetscMUMPS_c(mumps); 992 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 993 if (mumps->mumps_comm != MPI_COMM_NULL) { 994 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 995 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 996 } 997 } 998 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 999 if (mumps->use_petsc_omp_support) { 1000 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1001 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1002 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1003 } 1004 #endif 1005 PetscCall(PetscFree(mumps->ia_alloc)); 1006 PetscCall(PetscFree(mumps->ja_alloc)); 1007 PetscCall(PetscFree(mumps->recvcount)); 1008 PetscCall(PetscFree(mumps->reqs)); 1009 PetscCall(PetscFree(mumps->irhs_loc)); 1010 PetscCall(PetscFree(A->data)); 1011 1012 /* clear composed functions */ 1013 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1014 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1015 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1016 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1017 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1018 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1019 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1020 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1021 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1022 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1023 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1024 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1025 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1026 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1027 PetscFunctionReturn(PETSC_SUCCESS); 1028 } 1029 1030 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1031 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1032 { 1033 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1034 const PetscMPIInt ompsize = mumps->omp_comm_size; 1035 PetscInt i, m, M, rstart; 1036 1037 PetscFunctionBegin; 1038 PetscCall(MatGetSize(A, &M, NULL)); 1039 PetscCall(MatGetLocalSize(A, &m, NULL)); 1040 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1041 if (ompsize == 1) { 1042 if (!mumps->irhs_loc) { 1043 mumps->nloc_rhs = m; 1044 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1045 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1046 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1047 } 1048 mumps->id.rhs_loc = (MumpsScalar *)array; 1049 } else { 1050 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1051 const PetscInt *ranges; 1052 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1053 MPI_Group petsc_group, omp_group; 1054 PetscScalar *recvbuf = NULL; 1055 1056 if (mumps->is_omp_master) { 1057 /* Lazily initialize the omp stuff for distributed rhs */ 1058 if (!mumps->irhs_loc) { 1059 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1060 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1061 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1062 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1063 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1064 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1065 1066 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1067 mumps->nloc_rhs = 0; 1068 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1069 for (j = 0; j < ompsize; j++) { 1070 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1071 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1072 } 1073 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1074 for (j = k = 0; j < ompsize; j++) { 1075 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1076 } 1077 1078 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1079 PetscCallMPI(MPI_Group_free(&petsc_group)); 1080 PetscCallMPI(MPI_Group_free(&omp_group)); 1081 } 1082 1083 /* Realloc buffers when current nrhs is bigger than what we have met */ 1084 if (nrhs > mumps->max_nrhs) { 1085 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1086 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1087 mumps->max_nrhs = nrhs; 1088 } 1089 1090 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1091 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1092 mumps->rhs_disps[0] = 0; 1093 for (j = 1; j < ompsize; j++) { 1094 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1095 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1096 } 1097 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1098 } 1099 1100 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1101 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1102 1103 if (mumps->is_omp_master) { 1104 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1105 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1106 for (j = 0; j < ompsize; j++) { 1107 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1108 dst = dstbase; 1109 for (i = 0; i < nrhs; i++) { 1110 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1111 src += mumps->rhs_nrow[j]; 1112 dst += mumps->nloc_rhs; 1113 } 1114 dstbase += mumps->rhs_nrow[j]; 1115 } 1116 } 1117 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1118 } 1119 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1120 } 1121 mumps->id.nrhs = nrhs; 1122 mumps->id.nloc_rhs = mumps->nloc_rhs; 1123 mumps->id.lrhs_loc = mumps->nloc_rhs; 1124 mumps->id.irhs_loc = mumps->irhs_loc; 1125 PetscFunctionReturn(PETSC_SUCCESS); 1126 } 1127 1128 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1129 { 1130 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1131 const PetscScalar *rarray = NULL; 1132 PetscScalar *array; 1133 IS is_iden, is_petsc; 1134 PetscInt i; 1135 PetscBool second_solve = PETSC_FALSE; 1136 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1137 1138 PetscFunctionBegin; 1139 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1140 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1141 &cite1)); 1142 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1143 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1144 &cite2)); 1145 1146 if (A->factorerrortype) { 1147 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1148 PetscCall(VecSetInf(x)); 1149 PetscFunctionReturn(PETSC_SUCCESS); 1150 } 1151 1152 mumps->id.nrhs = 1; 1153 if (mumps->petsc_size > 1) { 1154 if (mumps->ICNTL20 == 10) { 1155 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1156 PetscCall(VecGetArrayRead(b, &rarray)); 1157 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1158 } else { 1159 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1160 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1161 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1162 if (!mumps->myid) { 1163 PetscCall(VecGetArray(mumps->b_seq, &array)); 1164 mumps->id.rhs = (MumpsScalar *)array; 1165 } 1166 } 1167 } else { /* petsc_size == 1 */ 1168 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1169 PetscCall(VecCopy(b, x)); 1170 PetscCall(VecGetArray(x, &array)); 1171 mumps->id.rhs = (MumpsScalar *)array; 1172 } 1173 1174 /* 1175 handle condensation step of Schur complement (if any) 1176 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1177 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1178 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1179 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1180 */ 1181 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1182 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1183 second_solve = PETSC_TRUE; 1184 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1185 } 1186 /* solve phase */ 1187 mumps->id.job = JOB_SOLVE; 1188 PetscMUMPS_c(mumps); 1189 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1190 1191 /* handle expansion step of Schur complement (if any) */ 1192 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1193 1194 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1195 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1196 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1197 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1198 } 1199 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1200 PetscInt *isol2_loc = NULL; 1201 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1202 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1203 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1204 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1205 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1206 PetscCall(ISDestroy(&is_iden)); 1207 PetscCall(ISDestroy(&is_petsc)); 1208 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1209 } 1210 1211 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1212 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1213 } 1214 1215 if (mumps->petsc_size > 1) { 1216 if (mumps->ICNTL20 == 10) { 1217 PetscCall(VecRestoreArrayRead(b, &rarray)); 1218 } else if (!mumps->myid) { 1219 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1220 } 1221 } else PetscCall(VecRestoreArray(x, &array)); 1222 1223 PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3))); 1224 PetscFunctionReturn(PETSC_SUCCESS); 1225 } 1226 1227 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1228 { 1229 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1230 1231 PetscFunctionBegin; 1232 mumps->id.ICNTL(9) = 0; 1233 PetscCall(MatSolve_MUMPS(A, b, x)); 1234 mumps->id.ICNTL(9) = 1; 1235 PetscFunctionReturn(PETSC_SUCCESS); 1236 } 1237 1238 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1239 { 1240 Mat Bt = NULL; 1241 PetscBool denseX, denseB, flg, flgT; 1242 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1243 PetscInt i, nrhs, M; 1244 PetscScalar *array; 1245 const PetscScalar *rbray; 1246 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1247 PetscMUMPSInt *isol_loc, *isol_loc_save; 1248 PetscScalar *bray, *sol_loc, *sol_loc_save; 1249 IS is_to, is_from; 1250 PetscInt k, proc, j, m, myrstart; 1251 const PetscInt *rstart; 1252 Vec v_mpi, msol_loc; 1253 VecScatter scat_sol; 1254 Vec b_seq; 1255 VecScatter scat_rhs; 1256 PetscScalar *aa; 1257 PetscInt spnr, *ia, *ja; 1258 Mat_MPIAIJ *b = NULL; 1259 1260 PetscFunctionBegin; 1261 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1262 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1263 1264 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1265 if (denseB) { 1266 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1267 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1268 } else { /* sparse B */ 1269 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1270 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1271 if (flgT) { /* input B is transpose of actual RHS matrix, 1272 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1273 PetscCall(MatTransposeGetMat(B, &Bt)); 1274 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1275 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1276 } 1277 1278 PetscCall(MatGetSize(B, &M, &nrhs)); 1279 mumps->id.nrhs = nrhs; 1280 mumps->id.lrhs = M; 1281 mumps->id.rhs = NULL; 1282 1283 if (mumps->petsc_size == 1) { 1284 PetscScalar *aa; 1285 PetscInt spnr, *ia, *ja; 1286 PetscBool second_solve = PETSC_FALSE; 1287 1288 PetscCall(MatDenseGetArray(X, &array)); 1289 mumps->id.rhs = (MumpsScalar *)array; 1290 1291 if (denseB) { 1292 /* copy B to X */ 1293 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1294 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1295 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1296 } else { /* sparse B */ 1297 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1298 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1299 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1300 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1301 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1302 } 1303 /* handle condensation step of Schur complement (if any) */ 1304 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1305 second_solve = PETSC_TRUE; 1306 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1307 } 1308 /* solve phase */ 1309 mumps->id.job = JOB_SOLVE; 1310 PetscMUMPS_c(mumps); 1311 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1312 1313 /* handle expansion step of Schur complement (if any) */ 1314 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1315 if (!denseB) { /* sparse B */ 1316 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1317 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1318 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1319 } 1320 PetscCall(MatDenseRestoreArray(X, &array)); 1321 PetscFunctionReturn(PETSC_SUCCESS); 1322 } 1323 1324 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1325 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1326 1327 /* create msol_loc to hold mumps local solution */ 1328 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1329 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1330 1331 lsol_loc = mumps->id.lsol_loc; 1332 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1333 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1334 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1335 mumps->id.isol_loc = isol_loc; 1336 1337 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1338 1339 if (denseB) { 1340 if (mumps->ICNTL20 == 10) { 1341 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1342 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1343 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1344 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1345 PetscCall(MatGetLocalSize(B, &m, NULL)); 1346 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1347 } else { 1348 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1349 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1350 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1351 0, re-arrange B into desired order, which is a local operation. 1352 */ 1353 1354 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1355 /* wrap dense rhs matrix B into a vector v_mpi */ 1356 PetscCall(MatGetLocalSize(B, &m, NULL)); 1357 PetscCall(MatDenseGetArray(B, &bray)); 1358 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1359 PetscCall(MatDenseRestoreArray(B, &bray)); 1360 1361 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1362 if (!mumps->myid) { 1363 PetscInt *idx; 1364 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1365 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1366 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1367 k = 0; 1368 for (proc = 0; proc < mumps->petsc_size; proc++) { 1369 for (j = 0; j < nrhs; j++) { 1370 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1371 } 1372 } 1373 1374 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1375 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1376 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1377 } else { 1378 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1379 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1380 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1381 } 1382 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1383 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1384 PetscCall(ISDestroy(&is_to)); 1385 PetscCall(ISDestroy(&is_from)); 1386 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1387 1388 if (!mumps->myid) { /* define rhs on the host */ 1389 PetscCall(VecGetArray(b_seq, &bray)); 1390 mumps->id.rhs = (MumpsScalar *)bray; 1391 PetscCall(VecRestoreArray(b_seq, &bray)); 1392 } 1393 } 1394 } else { /* sparse B */ 1395 b = (Mat_MPIAIJ *)Bt->data; 1396 1397 /* wrap dense X into a vector v_mpi */ 1398 PetscCall(MatGetLocalSize(X, &m, NULL)); 1399 PetscCall(MatDenseGetArray(X, &bray)); 1400 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1401 PetscCall(MatDenseRestoreArray(X, &bray)); 1402 1403 if (!mumps->myid) { 1404 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1405 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1406 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1407 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1408 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1409 } else { 1410 mumps->id.irhs_ptr = NULL; 1411 mumps->id.irhs_sparse = NULL; 1412 mumps->id.nz_rhs = 0; 1413 mumps->id.rhs_sparse = NULL; 1414 } 1415 } 1416 1417 /* solve phase */ 1418 mumps->id.job = JOB_SOLVE; 1419 PetscMUMPS_c(mumps); 1420 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1421 1422 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1423 PetscCall(MatDenseGetArray(X, &array)); 1424 PetscCall(VecPlaceArray(v_mpi, array)); 1425 1426 /* create scatter scat_sol */ 1427 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1428 /* iidx: index for scatter mumps solution to petsc X */ 1429 1430 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1431 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1432 for (i = 0; i < lsol_loc; i++) { 1433 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1434 1435 for (proc = 0; proc < mumps->petsc_size; proc++) { 1436 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1437 myrstart = rstart[proc]; 1438 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1439 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1440 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1441 break; 1442 } 1443 } 1444 1445 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1446 } 1447 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1448 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1449 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1450 PetscCall(ISDestroy(&is_from)); 1451 PetscCall(ISDestroy(&is_to)); 1452 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1453 PetscCall(MatDenseRestoreArray(X, &array)); 1454 1455 /* free spaces */ 1456 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1457 mumps->id.isol_loc = isol_loc_save; 1458 1459 PetscCall(PetscFree2(sol_loc, isol_loc)); 1460 PetscCall(PetscFree(idxx)); 1461 PetscCall(VecDestroy(&msol_loc)); 1462 PetscCall(VecDestroy(&v_mpi)); 1463 if (!denseB) { 1464 if (!mumps->myid) { 1465 b = (Mat_MPIAIJ *)Bt->data; 1466 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1467 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1468 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1469 } 1470 } else { 1471 if (mumps->ICNTL20 == 0) { 1472 PetscCall(VecDestroy(&b_seq)); 1473 PetscCall(VecScatterDestroy(&scat_rhs)); 1474 } 1475 } 1476 PetscCall(VecScatterDestroy(&scat_sol)); 1477 PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3))); 1478 PetscFunctionReturn(PETSC_SUCCESS); 1479 } 1480 1481 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1482 { 1483 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1484 PetscMUMPSInt oldvalue = mumps->id.ICNTL(9); 1485 1486 PetscFunctionBegin; 1487 mumps->id.ICNTL(9) = 0; 1488 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1489 mumps->id.ICNTL(9) = oldvalue; 1490 PetscFunctionReturn(PETSC_SUCCESS); 1491 } 1492 1493 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1494 { 1495 PetscBool flg; 1496 Mat B; 1497 1498 PetscFunctionBegin; 1499 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1500 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1501 1502 /* Create B=Bt^T that uses Bt's data structure */ 1503 PetscCall(MatCreateTranspose(Bt, &B)); 1504 1505 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1506 PetscCall(MatDestroy(&B)); 1507 PetscFunctionReturn(PETSC_SUCCESS); 1508 } 1509 1510 #if !defined(PETSC_USE_COMPLEX) 1511 /* 1512 input: 1513 F: numeric factor 1514 output: 1515 nneg: total number of negative pivots 1516 nzero: total number of zero pivots 1517 npos: (global dimension of F) - nneg - nzero 1518 */ 1519 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1520 { 1521 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1522 PetscMPIInt size; 1523 1524 PetscFunctionBegin; 1525 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1526 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1527 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1528 1529 if (nneg) *nneg = mumps->id.INFOG(12); 1530 if (nzero || npos) { 1531 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1532 if (nzero) *nzero = mumps->id.INFOG(28); 1533 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1534 } 1535 PetscFunctionReturn(PETSC_SUCCESS); 1536 } 1537 #endif 1538 1539 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1540 { 1541 PetscInt i, nreqs; 1542 PetscMUMPSInt *irn, *jcn; 1543 PetscMPIInt count; 1544 PetscInt64 totnnz, remain; 1545 const PetscInt osize = mumps->omp_comm_size; 1546 PetscScalar *val; 1547 1548 PetscFunctionBegin; 1549 if (osize > 1) { 1550 if (reuse == MAT_INITIAL_MATRIX) { 1551 /* master first gathers counts of nonzeros to receive */ 1552 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1553 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1554 1555 /* Then each computes number of send/recvs */ 1556 if (mumps->is_omp_master) { 1557 /* Start from 1 since self communication is not done in MPI */ 1558 nreqs = 0; 1559 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1560 } else { 1561 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1562 } 1563 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1564 1565 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1566 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1567 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1568 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1569 */ 1570 nreqs = 0; /* counter for actual send/recvs */ 1571 if (mumps->is_omp_master) { 1572 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1573 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1574 PetscCall(PetscMalloc1(totnnz, &val)); 1575 1576 /* Self communication */ 1577 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1578 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1579 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1580 1581 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1582 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1583 PetscCall(PetscFree(mumps->val_alloc)); 1584 mumps->nnz = totnnz; 1585 mumps->irn = irn; 1586 mumps->jcn = jcn; 1587 mumps->val = mumps->val_alloc = val; 1588 1589 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1590 jcn += mumps->recvcount[0]; 1591 val += mumps->recvcount[0]; 1592 1593 /* Remote communication */ 1594 for (i = 1; i < osize; i++) { 1595 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1596 remain = mumps->recvcount[i] - count; 1597 while (count > 0) { 1598 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1599 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1600 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1601 irn += count; 1602 jcn += count; 1603 val += count; 1604 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1605 remain -= count; 1606 } 1607 } 1608 } else { 1609 irn = mumps->irn; 1610 jcn = mumps->jcn; 1611 val = mumps->val; 1612 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1613 remain = mumps->nnz - count; 1614 while (count > 0) { 1615 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1616 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1617 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1618 irn += count; 1619 jcn += count; 1620 val += count; 1621 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1622 remain -= count; 1623 } 1624 } 1625 } else { 1626 nreqs = 0; 1627 if (mumps->is_omp_master) { 1628 val = mumps->val + mumps->recvcount[0]; 1629 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1630 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1631 remain = mumps->recvcount[i] - count; 1632 while (count > 0) { 1633 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1634 val += count; 1635 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1636 remain -= count; 1637 } 1638 } 1639 } else { 1640 val = mumps->val; 1641 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1642 remain = mumps->nnz - count; 1643 while (count > 0) { 1644 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1645 val += count; 1646 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1647 remain -= count; 1648 } 1649 } 1650 } 1651 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1652 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1653 } 1654 PetscFunctionReturn(PETSC_SUCCESS); 1655 } 1656 1657 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1658 { 1659 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1660 PetscBool isMPIAIJ; 1661 1662 PetscFunctionBegin; 1663 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1664 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1665 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1666 PetscFunctionReturn(PETSC_SUCCESS); 1667 } 1668 1669 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1670 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1671 1672 /* numerical factorization phase */ 1673 mumps->id.job = JOB_FACTNUMERIC; 1674 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1675 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1676 } else { 1677 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1678 } 1679 PetscMUMPS_c(mumps); 1680 if (mumps->id.INFOG(1) < 0) { 1681 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 1682 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1683 PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1684 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1685 } else if (mumps->id.INFOG(1) == -13) { 1686 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1687 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1688 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1689 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1690 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1691 } else { 1692 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1693 F->factorerrortype = MAT_FACTOR_OTHER; 1694 } 1695 } 1696 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, " mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16)); 1697 1698 F->assembled = PETSC_TRUE; 1699 1700 if (F->schur) { /* reset Schur status to unfactored */ 1701 #if defined(PETSC_HAVE_CUDA) 1702 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1703 #endif 1704 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1705 mumps->id.ICNTL(19) = 2; 1706 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1707 } 1708 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1709 } 1710 1711 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1712 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1713 1714 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1715 if (mumps->petsc_size > 1) { 1716 PetscInt lsol_loc; 1717 PetscScalar *sol_loc; 1718 1719 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1720 1721 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1722 if (mumps->x_seq) { 1723 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1724 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1725 PetscCall(VecDestroy(&mumps->x_seq)); 1726 } 1727 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1728 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1729 mumps->id.lsol_loc = lsol_loc; 1730 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1731 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 1732 } 1733 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1734 PetscFunctionReturn(PETSC_SUCCESS); 1735 } 1736 1737 /* Sets MUMPS options from the options database */ 1738 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1739 { 1740 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1741 PetscMUMPSInt icntl = 0, size, *listvar_schur; 1742 PetscInt info[80], i, ninfo = 80, rbs, cbs; 1743 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1744 MumpsScalar *arr; 1745 1746 PetscFunctionBegin; 1747 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 1748 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1749 PetscInt nthreads = 0; 1750 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1751 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1752 1753 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1754 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 1755 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 1756 1757 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 1758 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1759 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1760 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 1761 if (mumps->use_petsc_omp_support) { 1762 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 1763 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1764 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1765 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1766 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 1767 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 1768 #endif 1769 } else { 1770 mumps->omp_comm = PETSC_COMM_SELF; 1771 mumps->mumps_comm = mumps->petsc_comm; 1772 mumps->is_omp_master = PETSC_TRUE; 1773 } 1774 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 1775 mumps->reqs = NULL; 1776 mumps->tag = 0; 1777 1778 if (mumps->mumps_comm != MPI_COMM_NULL) { 1779 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 1780 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1781 MPI_Comm comm; 1782 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 1783 mumps->mumps_comm = comm; 1784 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 1785 } 1786 1787 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1788 mumps->id.job = JOB_INIT; 1789 mumps->id.par = 1; /* host participates factorizaton and solve */ 1790 mumps->id.sym = mumps->sym; 1791 1792 size = mumps->id.size_schur; 1793 arr = mumps->id.schur; 1794 listvar_schur = mumps->id.listvar_schur; 1795 PetscMUMPS_c(mumps); 1796 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1797 /* restore cached ICNTL and CNTL values */ 1798 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 1799 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 1800 PetscCall(PetscFree(mumps->ICNTL_pre)); 1801 PetscCall(PetscFree(mumps->CNTL_pre)); 1802 1803 if (schur) { 1804 mumps->id.size_schur = size; 1805 mumps->id.schur_lld = size; 1806 mumps->id.schur = arr; 1807 mumps->id.listvar_schur = listvar_schur; 1808 if (mumps->petsc_size > 1) { 1809 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 1810 1811 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1812 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1813 PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 1814 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 1815 } else { 1816 if (F->factortype == MAT_FACTOR_LU) { 1817 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1818 } else { 1819 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1820 } 1821 } 1822 mumps->id.ICNTL(26) = -1; 1823 } 1824 1825 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1826 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1827 */ 1828 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 1829 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 1830 1831 mumps->scat_rhs = NULL; 1832 mumps->scat_sol = NULL; 1833 1834 /* set PETSc-MUMPS default options - override MUMPS default */ 1835 mumps->id.ICNTL(3) = 0; 1836 mumps->id.ICNTL(4) = 0; 1837 if (mumps->petsc_size == 1) { 1838 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1839 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1840 } else { 1841 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1842 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1843 } 1844 } 1845 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 1846 if (flg) mumps->id.ICNTL(1) = icntl; 1847 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 1848 if (flg) mumps->id.ICNTL(2) = icntl; 1849 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 1850 if (flg) mumps->id.ICNTL(3) = icntl; 1851 1852 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 1853 if (flg) mumps->id.ICNTL(4) = icntl; 1854 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1855 1856 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 1857 if (flg) mumps->id.ICNTL(6) = icntl; 1858 1859 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 1860 if (flg) { 1861 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 1862 mumps->id.ICNTL(7) = icntl; 1863 } 1864 1865 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 1866 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 1867 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 1868 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 1869 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 1870 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 1871 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 1872 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 1873 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 1874 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 1875 if (flg) { 1876 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 1877 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 1878 } 1879 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 1880 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1881 PetscCall(MatDestroy(&F->schur)); 1882 PetscCall(MatMumpsResetSchur_Private(mumps)); 1883 } 1884 1885 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 1886 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1887 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1888 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 1889 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 1890 In short, we could not use distributed RHS with MPICH until v4.0b1. 1891 */ 1892 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 1893 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1894 #else 1895 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1896 #endif 1897 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 1898 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 1899 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 1900 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 1901 #endif 1902 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 1903 1904 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 1905 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 1906 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 1907 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 1908 1909 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 1910 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 1911 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 1912 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 1913 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 1914 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 1915 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 1916 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 1917 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 1918 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 1919 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 1920 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 1921 1922 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 1923 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 1924 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 1925 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 1926 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 1927 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 1928 1929 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 1930 1931 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 1932 if (ninfo) { 1933 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 1934 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 1935 mumps->ninfo = ninfo; 1936 for (i = 0; i < ninfo; i++) { 1937 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 1938 mumps->info[i] = info[i]; 1939 } 1940 } 1941 PetscOptionsEnd(); 1942 PetscFunctionReturn(PETSC_SUCCESS); 1943 } 1944 1945 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 1946 { 1947 PetscFunctionBegin; 1948 if (mumps->id.INFOG(1) < 0) { 1949 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1950 if (mumps->id.INFOG(1) == -6) { 1951 PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1952 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1953 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1954 PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1955 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1956 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1957 PetscCall(PetscInfo(F, "Empty matrix\n")); 1958 } else { 1959 PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1960 F->factorerrortype = MAT_FACTOR_OTHER; 1961 } 1962 } 1963 PetscFunctionReturn(PETSC_SUCCESS); 1964 } 1965 1966 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 1967 { 1968 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1969 Vec b; 1970 const PetscInt M = A->rmap->N; 1971 1972 PetscFunctionBegin; 1973 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 1974 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 1975 PetscFunctionReturn(PETSC_SUCCESS); 1976 } 1977 1978 /* Set MUMPS options from the options database */ 1979 PetscCall(MatSetFromOptions_MUMPS(F, A)); 1980 1981 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 1982 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 1983 1984 /* analysis phase */ 1985 mumps->id.job = JOB_FACTSYMBOLIC; 1986 mumps->id.n = M; 1987 switch (mumps->id.ICNTL(18)) { 1988 case 0: /* centralized assembled matrix input */ 1989 if (!mumps->myid) { 1990 mumps->id.nnz = mumps->nnz; 1991 mumps->id.irn = mumps->irn; 1992 mumps->id.jcn = mumps->jcn; 1993 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 1994 if (r) { 1995 mumps->id.ICNTL(7) = 1; 1996 if (!mumps->myid) { 1997 const PetscInt *idx; 1998 PetscInt i; 1999 2000 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2001 PetscCall(ISGetIndices(r, &idx)); 2002 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2003 PetscCall(ISRestoreIndices(r, &idx)); 2004 } 2005 } 2006 } 2007 break; 2008 case 3: /* distributed assembled matrix input (size>1) */ 2009 mumps->id.nnz_loc = mumps->nnz; 2010 mumps->id.irn_loc = mumps->irn; 2011 mumps->id.jcn_loc = mumps->jcn; 2012 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2013 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2014 PetscCall(MatCreateVecs(A, NULL, &b)); 2015 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2016 PetscCall(VecDestroy(&b)); 2017 } 2018 break; 2019 } 2020 PetscMUMPS_c(mumps); 2021 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2022 2023 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2024 F->ops->solve = MatSolve_MUMPS; 2025 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2026 F->ops->matsolve = MatMatSolve_MUMPS; 2027 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2028 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2029 2030 mumps->matstruc = SAME_NONZERO_PATTERN; 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 /* Note the Petsc r and c permutations are ignored */ 2035 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2036 { 2037 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2038 Vec b; 2039 const PetscInt M = A->rmap->N; 2040 2041 PetscFunctionBegin; 2042 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2043 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2044 PetscFunctionReturn(PETSC_SUCCESS); 2045 } 2046 2047 /* Set MUMPS options from the options database */ 2048 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2049 2050 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2051 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2052 2053 /* analysis phase */ 2054 mumps->id.job = JOB_FACTSYMBOLIC; 2055 mumps->id.n = M; 2056 switch (mumps->id.ICNTL(18)) { 2057 case 0: /* centralized assembled matrix input */ 2058 if (!mumps->myid) { 2059 mumps->id.nnz = mumps->nnz; 2060 mumps->id.irn = mumps->irn; 2061 mumps->id.jcn = mumps->jcn; 2062 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2063 } 2064 break; 2065 case 3: /* distributed assembled matrix input (size>1) */ 2066 mumps->id.nnz_loc = mumps->nnz; 2067 mumps->id.irn_loc = mumps->irn; 2068 mumps->id.jcn_loc = mumps->jcn; 2069 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2070 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2071 PetscCall(MatCreateVecs(A, NULL, &b)); 2072 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2073 PetscCall(VecDestroy(&b)); 2074 } 2075 break; 2076 } 2077 PetscMUMPS_c(mumps); 2078 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2079 2080 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2081 F->ops->solve = MatSolve_MUMPS; 2082 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2083 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2084 2085 mumps->matstruc = SAME_NONZERO_PATTERN; 2086 PetscFunctionReturn(PETSC_SUCCESS); 2087 } 2088 2089 /* Note the Petsc r permutation and factor info are ignored */ 2090 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2091 { 2092 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2093 Vec b; 2094 const PetscInt M = A->rmap->N; 2095 2096 PetscFunctionBegin; 2097 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2098 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2099 PetscFunctionReturn(PETSC_SUCCESS); 2100 } 2101 2102 /* Set MUMPS options from the options database */ 2103 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2104 2105 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2106 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2107 2108 /* analysis phase */ 2109 mumps->id.job = JOB_FACTSYMBOLIC; 2110 mumps->id.n = M; 2111 switch (mumps->id.ICNTL(18)) { 2112 case 0: /* centralized assembled matrix input */ 2113 if (!mumps->myid) { 2114 mumps->id.nnz = mumps->nnz; 2115 mumps->id.irn = mumps->irn; 2116 mumps->id.jcn = mumps->jcn; 2117 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2118 } 2119 break; 2120 case 3: /* distributed assembled matrix input (size>1) */ 2121 mumps->id.nnz_loc = mumps->nnz; 2122 mumps->id.irn_loc = mumps->irn; 2123 mumps->id.jcn_loc = mumps->jcn; 2124 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2125 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2126 PetscCall(MatCreateVecs(A, NULL, &b)); 2127 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2128 PetscCall(VecDestroy(&b)); 2129 } 2130 break; 2131 } 2132 PetscMUMPS_c(mumps); 2133 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2134 2135 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2136 F->ops->solve = MatSolve_MUMPS; 2137 F->ops->solvetranspose = MatSolve_MUMPS; 2138 F->ops->matsolve = MatMatSolve_MUMPS; 2139 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2140 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2141 #if defined(PETSC_USE_COMPLEX) 2142 F->ops->getinertia = NULL; 2143 #else 2144 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2145 #endif 2146 2147 mumps->matstruc = SAME_NONZERO_PATTERN; 2148 PetscFunctionReturn(PETSC_SUCCESS); 2149 } 2150 2151 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2152 { 2153 PetscBool iascii; 2154 PetscViewerFormat format; 2155 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2156 2157 PetscFunctionBegin; 2158 /* check if matrix is mumps type */ 2159 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2160 2161 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2162 if (iascii) { 2163 PetscCall(PetscViewerGetFormat(viewer, &format)); 2164 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2165 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2166 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2167 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2168 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2169 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2170 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2171 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2172 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2173 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2174 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2175 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2176 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2177 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2178 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2179 if (mumps->id.ICNTL(11) > 0) { 2180 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2181 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2182 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2183 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2184 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2185 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2186 } 2187 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2188 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2189 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2190 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2191 /* ICNTL(15-17) not used */ 2192 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2193 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2194 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2195 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2196 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2197 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2198 2199 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2200 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2201 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2202 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2203 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2204 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2205 2206 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2207 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2208 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2209 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2210 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2211 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2212 2213 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2214 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2215 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2216 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2217 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2218 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2219 2220 /* information local to each processor */ 2221 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2222 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2223 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2224 PetscCall(PetscViewerFlush(viewer)); 2225 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2226 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2227 PetscCall(PetscViewerFlush(viewer)); 2228 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2229 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2230 PetscCall(PetscViewerFlush(viewer)); 2231 2232 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2233 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2234 PetscCall(PetscViewerFlush(viewer)); 2235 2236 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2237 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2238 PetscCall(PetscViewerFlush(viewer)); 2239 2240 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2241 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2242 PetscCall(PetscViewerFlush(viewer)); 2243 2244 if (mumps->ninfo && mumps->ninfo <= 80) { 2245 PetscInt i; 2246 for (i = 0; i < mumps->ninfo; i++) { 2247 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2248 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2249 PetscCall(PetscViewerFlush(viewer)); 2250 } 2251 } 2252 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2253 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2254 2255 if (mumps->myid == 0) { /* information from the host */ 2256 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2257 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2258 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2259 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2260 2261 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2262 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2263 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2264 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2265 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2266 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2267 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2268 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2269 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2270 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2271 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2272 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2273 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2274 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2275 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2276 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2277 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2278 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2279 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2280 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2281 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2282 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2283 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2284 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2285 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2286 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2287 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2288 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2289 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2290 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2291 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2292 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2293 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2294 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2295 } 2296 } 2297 } 2298 PetscFunctionReturn(PETSC_SUCCESS); 2299 } 2300 2301 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2302 { 2303 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2304 2305 PetscFunctionBegin; 2306 info->block_size = 1.0; 2307 info->nz_allocated = mumps->id.INFOG(20); 2308 info->nz_used = mumps->id.INFOG(20); 2309 info->nz_unneeded = 0.0; 2310 info->assemblies = 0.0; 2311 info->mallocs = 0.0; 2312 info->memory = 0.0; 2313 info->fill_ratio_given = 0; 2314 info->fill_ratio_needed = 0; 2315 info->factor_mallocs = 0; 2316 PetscFunctionReturn(PETSC_SUCCESS); 2317 } 2318 2319 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2320 { 2321 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2322 const PetscScalar *arr; 2323 const PetscInt *idxs; 2324 PetscInt size, i; 2325 2326 PetscFunctionBegin; 2327 PetscCall(ISGetLocalSize(is, &size)); 2328 /* Schur complement matrix */ 2329 PetscCall(MatDestroy(&F->schur)); 2330 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2331 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2332 mumps->id.schur = (MumpsScalar *)arr; 2333 mumps->id.size_schur = size; 2334 mumps->id.schur_lld = size; 2335 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2336 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2337 2338 /* MUMPS expects Fortran style indices */ 2339 PetscCall(PetscFree(mumps->id.listvar_schur)); 2340 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2341 PetscCall(ISGetIndices(is, &idxs)); 2342 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2343 PetscCall(ISRestoreIndices(is, &idxs)); 2344 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2345 mumps->id.ICNTL(26) = -1; 2346 PetscFunctionReturn(PETSC_SUCCESS); 2347 } 2348 2349 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2350 { 2351 Mat St; 2352 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2353 PetscScalar *array; 2354 #if defined(PETSC_USE_COMPLEX) 2355 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2356 #endif 2357 2358 PetscFunctionBegin; 2359 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2360 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2361 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2362 PetscCall(MatSetType(St, MATDENSE)); 2363 PetscCall(MatSetUp(St)); 2364 PetscCall(MatDenseGetArray(St, &array)); 2365 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2366 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2367 PetscInt i, j, N = mumps->id.size_schur; 2368 for (i = 0; i < N; i++) { 2369 for (j = 0; j < N; j++) { 2370 #if !defined(PETSC_USE_COMPLEX) 2371 PetscScalar val = mumps->id.schur[i * N + j]; 2372 #else 2373 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2374 #endif 2375 array[j * N + i] = val; 2376 } 2377 } 2378 } else { /* stored by columns */ 2379 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2380 } 2381 } else { /* either full or lower-triangular (not packed) */ 2382 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2383 PetscInt i, j, N = mumps->id.size_schur; 2384 for (i = 0; i < N; i++) { 2385 for (j = i; j < N; j++) { 2386 #if !defined(PETSC_USE_COMPLEX) 2387 PetscScalar val = mumps->id.schur[i * N + j]; 2388 #else 2389 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2390 #endif 2391 array[i * N + j] = val; 2392 array[j * N + i] = val; 2393 } 2394 } 2395 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2396 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2397 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2398 PetscInt i, j, N = mumps->id.size_schur; 2399 for (i = 0; i < N; i++) { 2400 for (j = 0; j < i + 1; j++) { 2401 #if !defined(PETSC_USE_COMPLEX) 2402 PetscScalar val = mumps->id.schur[i * N + j]; 2403 #else 2404 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2405 #endif 2406 array[i * N + j] = val; 2407 array[j * N + i] = val; 2408 } 2409 } 2410 } 2411 } 2412 PetscCall(MatDenseRestoreArray(St, &array)); 2413 *S = St; 2414 PetscFunctionReturn(PETSC_SUCCESS); 2415 } 2416 2417 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2418 { 2419 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2420 2421 PetscFunctionBegin; 2422 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2423 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2424 for (i = 0; i < nICNTL_pre; ++i) 2425 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2426 if (i == nICNTL_pre) { /* not already cached */ 2427 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2428 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2429 mumps->ICNTL_pre[0]++; 2430 } 2431 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2432 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2433 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2434 PetscFunctionReturn(PETSC_SUCCESS); 2435 } 2436 2437 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2438 { 2439 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2440 2441 PetscFunctionBegin; 2442 *ival = mumps->id.ICNTL(icntl); 2443 PetscFunctionReturn(PETSC_SUCCESS); 2444 } 2445 2446 /*@ 2447 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2448 2449 Logically Collective 2450 2451 Input Parameters: 2452 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2453 . icntl - index of MUMPS parameter array ICNTL() 2454 - ival - value of MUMPS ICNTL(icntl) 2455 2456 Options Database Key: 2457 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2458 2459 Level: beginner 2460 2461 References: 2462 . * - MUMPS Users' Guide 2463 2464 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2465 @*/ 2466 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2467 { 2468 PetscFunctionBegin; 2469 PetscValidType(F, 1); 2470 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2471 PetscValidLogicalCollectiveInt(F, icntl, 2); 2472 PetscValidLogicalCollectiveInt(F, ival, 3); 2473 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2474 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2475 PetscFunctionReturn(PETSC_SUCCESS); 2476 } 2477 2478 /*@ 2479 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2480 2481 Logically Collective 2482 2483 Input Parameters: 2484 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2485 - icntl - index of MUMPS parameter array ICNTL() 2486 2487 Output Parameter: 2488 . ival - value of MUMPS ICNTL(icntl) 2489 2490 Level: beginner 2491 2492 References: 2493 . * - MUMPS Users' Guide 2494 2495 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2496 @*/ 2497 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2498 { 2499 PetscFunctionBegin; 2500 PetscValidType(F, 1); 2501 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2502 PetscValidLogicalCollectiveInt(F, icntl, 2); 2503 PetscValidIntPointer(ival, 3); 2504 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2505 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2506 PetscFunctionReturn(PETSC_SUCCESS); 2507 } 2508 2509 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2510 { 2511 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2512 2513 PetscFunctionBegin; 2514 if (mumps->id.job == JOB_NULL) { 2515 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2516 for (i = 0; i < nCNTL_pre; ++i) 2517 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2518 if (i == nCNTL_pre) { 2519 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2520 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2521 mumps->CNTL_pre[0]++; 2522 } 2523 mumps->CNTL_pre[1 + 2 * i] = icntl; 2524 mumps->CNTL_pre[2 + 2 * i] = val; 2525 } else mumps->id.CNTL(icntl) = val; 2526 PetscFunctionReturn(PETSC_SUCCESS); 2527 } 2528 2529 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2530 { 2531 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2532 2533 PetscFunctionBegin; 2534 *val = mumps->id.CNTL(icntl); 2535 PetscFunctionReturn(PETSC_SUCCESS); 2536 } 2537 2538 /*@ 2539 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2540 2541 Logically Collective 2542 2543 Input Parameters: 2544 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2545 . icntl - index of MUMPS parameter array CNTL() 2546 - val - value of MUMPS CNTL(icntl) 2547 2548 Options Database Key: 2549 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2550 2551 Level: beginner 2552 2553 References: 2554 . * - MUMPS Users' Guide 2555 2556 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2557 @*/ 2558 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2559 { 2560 PetscFunctionBegin; 2561 PetscValidType(F, 1); 2562 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2563 PetscValidLogicalCollectiveInt(F, icntl, 2); 2564 PetscValidLogicalCollectiveReal(F, val, 3); 2565 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2566 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2567 PetscFunctionReturn(PETSC_SUCCESS); 2568 } 2569 2570 /*@ 2571 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2572 2573 Logically Collective 2574 2575 Input Parameters: 2576 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2577 - icntl - index of MUMPS parameter array CNTL() 2578 2579 Output Parameter: 2580 . val - value of MUMPS CNTL(icntl) 2581 2582 Level: beginner 2583 2584 References: 2585 . * - MUMPS Users' Guide 2586 2587 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2588 @*/ 2589 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2590 { 2591 PetscFunctionBegin; 2592 PetscValidType(F, 1); 2593 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2594 PetscValidLogicalCollectiveInt(F, icntl, 2); 2595 PetscValidRealPointer(val, 3); 2596 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2597 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2598 PetscFunctionReturn(PETSC_SUCCESS); 2599 } 2600 2601 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2602 { 2603 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2604 2605 PetscFunctionBegin; 2606 *info = mumps->id.INFO(icntl); 2607 PetscFunctionReturn(PETSC_SUCCESS); 2608 } 2609 2610 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2611 { 2612 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2613 2614 PetscFunctionBegin; 2615 *infog = mumps->id.INFOG(icntl); 2616 PetscFunctionReturn(PETSC_SUCCESS); 2617 } 2618 2619 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2620 { 2621 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2622 2623 PetscFunctionBegin; 2624 *rinfo = mumps->id.RINFO(icntl); 2625 PetscFunctionReturn(PETSC_SUCCESS); 2626 } 2627 2628 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2629 { 2630 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2631 2632 PetscFunctionBegin; 2633 *rinfog = mumps->id.RINFOG(icntl); 2634 PetscFunctionReturn(PETSC_SUCCESS); 2635 } 2636 2637 PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2638 { 2639 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2640 2641 PetscFunctionBegin; 2642 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2643 *size = 0; 2644 *array = NULL; 2645 if (!mumps->myid) { 2646 *size = mumps->id.INFOG(28); 2647 PetscCall(PetscMalloc1(*size, array)); 2648 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2649 } 2650 PetscFunctionReturn(PETSC_SUCCESS); 2651 } 2652 2653 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2654 { 2655 Mat Bt = NULL, Btseq = NULL; 2656 PetscBool flg; 2657 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2658 PetscScalar *aa; 2659 PetscInt spnr, *ia, *ja, M, nrhs; 2660 2661 PetscFunctionBegin; 2662 PetscValidPointer(spRHS, 2); 2663 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2664 if (flg) { 2665 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2666 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2667 2668 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2669 2670 if (mumps->petsc_size > 1) { 2671 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2672 Btseq = b->A; 2673 } else { 2674 Btseq = Bt; 2675 } 2676 2677 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2678 mumps->id.nrhs = nrhs; 2679 mumps->id.lrhs = M; 2680 mumps->id.rhs = NULL; 2681 2682 if (!mumps->myid) { 2683 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2684 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2685 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2686 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2687 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2688 } else { 2689 mumps->id.irhs_ptr = NULL; 2690 mumps->id.irhs_sparse = NULL; 2691 mumps->id.nz_rhs = 0; 2692 mumps->id.rhs_sparse = NULL; 2693 } 2694 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2695 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2696 2697 /* solve phase */ 2698 mumps->id.job = JOB_SOLVE; 2699 PetscMUMPS_c(mumps); 2700 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 2701 2702 if (!mumps->myid) { 2703 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 2704 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2705 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2706 } 2707 PetscFunctionReturn(PETSC_SUCCESS); 2708 } 2709 2710 /*@ 2711 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 2712 2713 Logically Collective 2714 2715 Input Parameter: 2716 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2717 2718 Output Parameter: 2719 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 2720 2721 Level: beginner 2722 2723 References: 2724 . * - MUMPS Users' Guide 2725 2726 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 2727 @*/ 2728 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2729 { 2730 PetscFunctionBegin; 2731 PetscValidType(F, 1); 2732 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2733 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 2734 PetscFunctionReturn(PETSC_SUCCESS); 2735 } 2736 2737 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 2738 { 2739 Mat spRHS; 2740 2741 PetscFunctionBegin; 2742 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 2743 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 2744 PetscCall(MatDestroy(&spRHS)); 2745 PetscFunctionReturn(PETSC_SUCCESS); 2746 } 2747 2748 /*@ 2749 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 2750 2751 Logically Collective 2752 2753 Input Parameter: 2754 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2755 2756 Output Parameter: 2757 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 2758 2759 Level: beginner 2760 2761 References: 2762 . * - MUMPS Users' Guide 2763 2764 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 2765 @*/ 2766 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 2767 { 2768 PetscBool flg; 2769 2770 PetscFunctionBegin; 2771 PetscValidType(F, 1); 2772 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2773 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 2774 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 2775 2776 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 2777 PetscFunctionReturn(PETSC_SUCCESS); 2778 } 2779 2780 /*@ 2781 MatMumpsGetInfo - Get MUMPS parameter INFO() 2782 2783 Logically Collective 2784 2785 Input Parameters: 2786 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2787 - icntl - index of MUMPS parameter array INFO() 2788 2789 Output Parameter: 2790 . ival - value of MUMPS INFO(icntl) 2791 2792 Level: beginner 2793 2794 References: 2795 . * - MUMPS Users' Guide 2796 2797 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2798 @*/ 2799 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 2800 { 2801 PetscFunctionBegin; 2802 PetscValidType(F, 1); 2803 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2804 PetscValidIntPointer(ival, 3); 2805 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2806 PetscFunctionReturn(PETSC_SUCCESS); 2807 } 2808 2809 /*@ 2810 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2811 2812 Logically Collective 2813 2814 Input Parameters: 2815 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2816 - icntl - index of MUMPS parameter array INFOG() 2817 2818 Output Parameter: 2819 . ival - value of MUMPS INFOG(icntl) 2820 2821 Level: beginner 2822 2823 References: 2824 . * - MUMPS Users' Guide 2825 2826 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2827 @*/ 2828 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 2829 { 2830 PetscFunctionBegin; 2831 PetscValidType(F, 1); 2832 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2833 PetscValidIntPointer(ival, 3); 2834 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2835 PetscFunctionReturn(PETSC_SUCCESS); 2836 } 2837 2838 /*@ 2839 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2840 2841 Logically Collective 2842 2843 Input Parameters: 2844 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2845 - icntl - index of MUMPS parameter array RINFO() 2846 2847 Output Parameter: 2848 . val - value of MUMPS RINFO(icntl) 2849 2850 Level: beginner 2851 2852 References: 2853 . * - MUMPS Users' Guide 2854 2855 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 2856 @*/ 2857 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 2858 { 2859 PetscFunctionBegin; 2860 PetscValidType(F, 1); 2861 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2862 PetscValidRealPointer(val, 3); 2863 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2864 PetscFunctionReturn(PETSC_SUCCESS); 2865 } 2866 2867 /*@ 2868 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2869 2870 Logically Collective 2871 2872 Input Parameters: 2873 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2874 - icntl - index of MUMPS parameter array RINFOG() 2875 2876 Output Parameter: 2877 . val - value of MUMPS RINFOG(icntl) 2878 2879 Level: beginner 2880 2881 References: 2882 . * - MUMPS Users' Guide 2883 2884 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 2885 @*/ 2886 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 2887 { 2888 PetscFunctionBegin; 2889 PetscValidType(F, 1); 2890 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2891 PetscValidRealPointer(val, 3); 2892 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2893 PetscFunctionReturn(PETSC_SUCCESS); 2894 } 2895 2896 /*@ 2897 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 2898 2899 Logically Collective 2900 2901 Input Parameter: 2902 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2903 2904 Output Parameters: 2905 + size - local size of the array. The size of the array is non-zero only on the host. 2906 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 2907 for freeing this array. 2908 2909 Level: beginner 2910 2911 References: 2912 . * - MUMPS Users' Guide 2913 2914 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 2915 @*/ 2916 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 2917 { 2918 PetscFunctionBegin; 2919 PetscValidType(F, 1); 2920 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2921 PetscValidIntPointer(size, 3); 2922 PetscValidPointer(array, 4); 2923 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 2924 PetscFunctionReturn(PETSC_SUCCESS); 2925 } 2926 2927 /*MC 2928 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2929 distributed and sequential matrices via the external package MUMPS. 2930 2931 Works with `MATAIJ` and `MATSBAIJ` matrices 2932 2933 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2934 2935 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 2936 See details below. 2937 2938 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 2939 2940 Options Database Keys: 2941 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2942 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2943 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2944 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2945 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2946 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2947 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 2948 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2949 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2950 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2951 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2952 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2953 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2954 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 2955 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2956 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2957 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2958 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2959 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2960 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2961 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2962 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2963 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2964 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2965 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2966 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2967 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2968 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2969 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2970 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2971 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2972 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2973 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2974 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2975 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2976 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2977 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2978 2979 Level: beginner 2980 2981 Notes: 2982 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 2983 error if the matrix is Hermitian. 2984 2985 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 2986 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 2987 2988 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 2989 the failure with 2990 .vb 2991 KSPGetPC(ksp,&pc); 2992 PCFactorGetMatrix(pc,&mat); 2993 MatMumpsGetInfo(mat,....); 2994 MatMumpsGetInfog(mat,....); etc. 2995 .ve 2996 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 2997 2998 MUMPS provides 64-bit integer support in two build modes: 2999 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3000 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3001 3002 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3003 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3004 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3005 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3006 3007 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3008 3009 Two modes to run MUMPS/PETSc with OpenMP 3010 .vb 3011 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3012 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3013 .ve 3014 3015 .vb 3016 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3017 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3018 .ve 3019 3020 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3021 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3022 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3023 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3024 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3025 3026 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3027 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3028 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3029 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3030 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3031 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3032 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3033 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3034 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3035 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3036 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3037 examine the mapping result. 3038 3039 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3040 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3041 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3042 3043 References: 3044 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3045 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3046 3047 .seealso: [](chapter_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3048 M*/ 3049 3050 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3051 { 3052 PetscFunctionBegin; 3053 *type = MATSOLVERMUMPS; 3054 PetscFunctionReturn(PETSC_SUCCESS); 3055 } 3056 3057 /* MatGetFactor for Seq and MPI AIJ matrices */ 3058 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3059 { 3060 Mat B; 3061 Mat_MUMPS *mumps; 3062 PetscBool isSeqAIJ; 3063 PetscMPIInt size; 3064 3065 PetscFunctionBegin; 3066 #if defined(PETSC_USE_COMPLEX) 3067 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3068 #endif 3069 /* Create the factorization matrix */ 3070 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3071 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3072 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3073 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3074 PetscCall(MatSetUp(B)); 3075 3076 PetscCall(PetscNew(&mumps)); 3077 3078 B->ops->view = MatView_MUMPS; 3079 B->ops->getinfo = MatGetInfo_MUMPS; 3080 3081 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3082 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3083 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3084 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3085 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3086 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3087 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3088 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3089 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3090 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3091 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3092 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3093 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3094 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3095 3096 if (ftype == MAT_FACTOR_LU) { 3097 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3098 B->factortype = MAT_FACTOR_LU; 3099 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3100 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3101 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3102 mumps->sym = 0; 3103 } else { 3104 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3105 B->factortype = MAT_FACTOR_CHOLESKY; 3106 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3107 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3108 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3109 #if defined(PETSC_USE_COMPLEX) 3110 mumps->sym = 2; 3111 #else 3112 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3113 else mumps->sym = 2; 3114 #endif 3115 } 3116 3117 /* set solvertype */ 3118 PetscCall(PetscFree(B->solvertype)); 3119 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3120 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3121 if (size == 1) { 3122 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3123 B->canuseordering = PETSC_TRUE; 3124 } 3125 B->ops->destroy = MatDestroy_MUMPS; 3126 B->data = (void *)mumps; 3127 3128 *F = B; 3129 mumps->id.job = JOB_NULL; 3130 mumps->ICNTL_pre = NULL; 3131 mumps->CNTL_pre = NULL; 3132 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3133 PetscFunctionReturn(PETSC_SUCCESS); 3134 } 3135 3136 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3137 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3138 { 3139 Mat B; 3140 Mat_MUMPS *mumps; 3141 PetscBool isSeqSBAIJ; 3142 PetscMPIInt size; 3143 3144 PetscFunctionBegin; 3145 #if defined(PETSC_USE_COMPLEX) 3146 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3147 #endif 3148 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3149 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3150 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3151 PetscCall(MatSetUp(B)); 3152 3153 PetscCall(PetscNew(&mumps)); 3154 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3155 if (isSeqSBAIJ) { 3156 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3157 } else { 3158 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3159 } 3160 3161 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3162 B->ops->view = MatView_MUMPS; 3163 B->ops->getinfo = MatGetInfo_MUMPS; 3164 3165 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3166 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3167 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3168 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3169 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3170 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3171 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3172 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3173 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3174 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3175 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3176 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3177 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3178 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3179 3180 B->factortype = MAT_FACTOR_CHOLESKY; 3181 #if defined(PETSC_USE_COMPLEX) 3182 mumps->sym = 2; 3183 #else 3184 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3185 else mumps->sym = 2; 3186 #endif 3187 3188 /* set solvertype */ 3189 PetscCall(PetscFree(B->solvertype)); 3190 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3191 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3192 if (size == 1) { 3193 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3194 B->canuseordering = PETSC_TRUE; 3195 } 3196 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3197 B->ops->destroy = MatDestroy_MUMPS; 3198 B->data = (void *)mumps; 3199 3200 *F = B; 3201 mumps->id.job = JOB_NULL; 3202 mumps->ICNTL_pre = NULL; 3203 mumps->CNTL_pre = NULL; 3204 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3205 PetscFunctionReturn(PETSC_SUCCESS); 3206 } 3207 3208 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3209 { 3210 Mat B; 3211 Mat_MUMPS *mumps; 3212 PetscBool isSeqBAIJ; 3213 PetscMPIInt size; 3214 3215 PetscFunctionBegin; 3216 /* Create the factorization matrix */ 3217 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3218 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3219 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3220 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3221 PetscCall(MatSetUp(B)); 3222 3223 PetscCall(PetscNew(&mumps)); 3224 if (ftype == MAT_FACTOR_LU) { 3225 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3226 B->factortype = MAT_FACTOR_LU; 3227 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3228 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3229 mumps->sym = 0; 3230 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3231 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3232 3233 B->ops->view = MatView_MUMPS; 3234 B->ops->getinfo = MatGetInfo_MUMPS; 3235 3236 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3237 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3238 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3239 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3240 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3241 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3242 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3243 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3244 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3245 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3246 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3247 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3248 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3249 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3250 3251 /* set solvertype */ 3252 PetscCall(PetscFree(B->solvertype)); 3253 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3254 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3255 if (size == 1) { 3256 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3257 B->canuseordering = PETSC_TRUE; 3258 } 3259 B->ops->destroy = MatDestroy_MUMPS; 3260 B->data = (void *)mumps; 3261 3262 *F = B; 3263 mumps->id.job = JOB_NULL; 3264 mumps->ICNTL_pre = NULL; 3265 mumps->CNTL_pre = NULL; 3266 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3267 PetscFunctionReturn(PETSC_SUCCESS); 3268 } 3269 3270 /* MatGetFactor for Seq and MPI SELL matrices */ 3271 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3272 { 3273 Mat B; 3274 Mat_MUMPS *mumps; 3275 PetscBool isSeqSELL; 3276 PetscMPIInt size; 3277 3278 PetscFunctionBegin; 3279 /* Create the factorization matrix */ 3280 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3281 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3282 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3283 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3284 PetscCall(MatSetUp(B)); 3285 3286 PetscCall(PetscNew(&mumps)); 3287 3288 B->ops->view = MatView_MUMPS; 3289 B->ops->getinfo = MatGetInfo_MUMPS; 3290 3291 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3292 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3293 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3294 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3295 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3296 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3297 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3298 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3299 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3300 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3301 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3302 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3303 3304 if (ftype == MAT_FACTOR_LU) { 3305 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3306 B->factortype = MAT_FACTOR_LU; 3307 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3308 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3309 mumps->sym = 0; 3310 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3311 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3312 3313 /* set solvertype */ 3314 PetscCall(PetscFree(B->solvertype)); 3315 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3316 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3317 if (size == 1) { 3318 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3319 B->canuseordering = PETSC_TRUE; 3320 } 3321 B->ops->destroy = MatDestroy_MUMPS; 3322 B->data = (void *)mumps; 3323 3324 *F = B; 3325 mumps->id.job = JOB_NULL; 3326 mumps->ICNTL_pre = NULL; 3327 mumps->CNTL_pre = NULL; 3328 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3329 PetscFunctionReturn(PETSC_SUCCESS); 3330 } 3331 3332 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3333 { 3334 PetscFunctionBegin; 3335 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3336 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3337 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3338 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3339 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3340 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3341 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3342 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3343 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3344 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3345 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3346 PetscFunctionReturn(PETSC_SUCCESS); 3347 } 3348