1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #if defined(PETSC_USE_REAL_SINGLE) 15 #include <cmumps_c.h> 16 #else 17 #include <zmumps_c.h> 18 #endif 19 #else 20 #if defined(PETSC_USE_REAL_SINGLE) 21 #include <smumps_c.h> 22 #else 23 #include <dmumps_c.h> 24 #endif 25 #endif 26 EXTERN_C_END 27 #define JOB_INIT -1 28 #define JOB_NULL 0 29 #define JOB_FACTSYMBOLIC 1 30 #define JOB_FACTNUMERIC 2 31 #define JOB_SOLVE 3 32 #define JOB_END -2 33 34 /* calls to MUMPS */ 35 #if defined(PETSC_USE_COMPLEX) 36 #if defined(PETSC_USE_REAL_SINGLE) 37 #define MUMPS_c cmumps_c 38 #else 39 #define MUMPS_c zmumps_c 40 #endif 41 #else 42 #if defined(PETSC_USE_REAL_SINGLE) 43 #define MUMPS_c smumps_c 44 #else 45 #define MUMPS_c dmumps_c 46 #endif 47 #endif 48 49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 50 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 51 naming convention in PetscMPIInt, PetscBLASInt etc. 52 */ 53 typedef MUMPS_INT PetscMUMPSInt; 54 55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 56 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 57 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 58 #endif 59 #else 60 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 61 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 62 #endif 63 #endif 64 65 #define MPIU_MUMPSINT MPI_INT 66 #define PETSC_MUMPS_INT_MAX 2147483647 67 #define PETSC_MUMPS_INT_MIN -2147483648 68 69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 70 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 71 { 72 PetscFunctionBegin; 73 #if PetscDefined(USE_64BIT_INDICES) 74 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 75 #endif 76 *b = (PetscMUMPSInt)(a); 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 /* Put these utility routines here since they are only used in this file */ 81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 82 { 83 PetscInt myval; 84 PetscBool myset; 85 PetscFunctionBegin; 86 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 87 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 88 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 89 if (set) *set = myset; 90 PetscFunctionReturn(PETSC_SUCCESS); 91 } 92 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 93 94 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 95 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 96 #define PetscMUMPS_c(mumps) \ 97 do { \ 98 if (mumps->use_petsc_omp_support) { \ 99 if (mumps->is_omp_master) { \ 100 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 101 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 102 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 103 PetscCall(PetscFPTrapPop()); \ 104 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 105 } \ 106 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 107 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 108 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 109 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 110 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 111 */ \ 112 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 113 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 115 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 116 } else { \ 117 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 118 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 119 PetscCall(PetscFPTrapPop()); \ 120 } \ 121 } while (0) 122 #else 123 #define PetscMUMPS_c(mumps) \ 124 do { \ 125 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 126 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 127 PetscCall(PetscFPTrapPop()); \ 128 } while (0) 129 #endif 130 131 /* declare MumpsScalar */ 132 #if defined(PETSC_USE_COMPLEX) 133 #if defined(PETSC_USE_REAL_SINGLE) 134 #define MumpsScalar mumps_complex 135 #else 136 #define MumpsScalar mumps_double_complex 137 #endif 138 #else 139 #define MumpsScalar PetscScalar 140 #endif 141 142 /* macros s.t. indices match MUMPS documentation */ 143 #define ICNTL(I) icntl[(I)-1] 144 #define CNTL(I) cntl[(I)-1] 145 #define INFOG(I) infog[(I)-1] 146 #define INFO(I) info[(I)-1] 147 #define RINFOG(I) rinfog[(I)-1] 148 #define RINFO(I) rinfo[(I)-1] 149 150 typedef struct Mat_MUMPS Mat_MUMPS; 151 struct Mat_MUMPS { 152 #if defined(PETSC_USE_COMPLEX) 153 #if defined(PETSC_USE_REAL_SINGLE) 154 CMUMPS_STRUC_C id; 155 #else 156 ZMUMPS_STRUC_C id; 157 #endif 158 #else 159 #if defined(PETSC_USE_REAL_SINGLE) 160 SMUMPS_STRUC_C id; 161 #else 162 DMUMPS_STRUC_C id; 163 #endif 164 #endif 165 166 MatStructure matstruc; 167 PetscMPIInt myid, petsc_size; 168 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 169 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 170 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 171 PetscMUMPSInt sym; 172 MPI_Comm mumps_comm; 173 PetscMUMPSInt *ICNTL_pre; 174 PetscReal *CNTL_pre; 175 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 176 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 177 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 178 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 179 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 180 PetscInt *rhs_nrow, max_nrhs; 181 PetscMPIInt *rhs_recvcounts, *rhs_disps; 182 PetscScalar *rhs_loc, *rhs_recvbuf; 183 #endif 184 Vec b_seq, x_seq; 185 PetscInt ninfo, *info; /* which INFO to display */ 186 PetscInt sizeredrhs; 187 PetscScalar *schur_sol; 188 PetscInt schur_sizesol; 189 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 190 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 191 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 192 193 /* Support for MATNEST */ 194 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 195 PetscInt64 *nest_vals_start; 196 PetscScalar *nest_vals; 197 198 /* stuff used by petsc/mumps OpenMP support*/ 199 PetscBool use_petsc_omp_support; 200 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 201 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 202 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 203 PetscMPIInt tag, omp_comm_size; 204 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 205 MPI_Request *reqs; 206 }; 207 208 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 209 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 210 */ 211 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 212 { 213 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 214 215 PetscFunctionBegin; 216 #if defined(PETSC_USE_64BIT_INDICES) 217 { 218 PetscInt i; 219 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 220 PetscCall(PetscFree(mumps->ia_alloc)); 221 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 222 mumps->cur_ilen = nrow + 1; 223 } 224 if (nnz > mumps->cur_jlen) { 225 PetscCall(PetscFree(mumps->ja_alloc)); 226 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 227 mumps->cur_jlen = nnz; 228 } 229 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 230 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 231 *ia_mumps = mumps->ia_alloc; 232 *ja_mumps = mumps->ja_alloc; 233 } 234 #else 235 *ia_mumps = ia; 236 *ja_mumps = ja; 237 #endif 238 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 243 { 244 PetscFunctionBegin; 245 PetscCall(PetscFree(mumps->id.listvar_schur)); 246 PetscCall(PetscFree(mumps->id.redrhs)); 247 PetscCall(PetscFree(mumps->schur_sol)); 248 mumps->id.size_schur = 0; 249 mumps->id.schur_lld = 0; 250 mumps->id.ICNTL(19) = 0; 251 PetscFunctionReturn(PETSC_SUCCESS); 252 } 253 254 /* solve with rhs in mumps->id.redrhs and return in the same location */ 255 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 256 { 257 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 258 Mat S, B, X; 259 MatFactorSchurStatus schurstatus; 260 PetscInt sizesol; 261 262 PetscFunctionBegin; 263 PetscCall(MatFactorFactorizeSchurComplement(F)); 264 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 265 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 266 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 267 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 268 PetscCall(MatBindToCPU(B, S->boundtocpu)); 269 #endif 270 switch (schurstatus) { 271 case MAT_FACTOR_SCHUR_FACTORED: 272 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 273 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 274 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 275 PetscCall(MatBindToCPU(X, S->boundtocpu)); 276 #endif 277 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 278 PetscCall(MatMatSolveTranspose(S, B, X)); 279 } else { 280 PetscCall(MatMatSolve(S, B, X)); 281 } 282 break; 283 case MAT_FACTOR_SCHUR_INVERTED: 284 sizesol = mumps->id.nrhs * mumps->id.size_schur; 285 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 286 PetscCall(PetscFree(mumps->schur_sol)); 287 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 288 mumps->schur_sizesol = sizesol; 289 } 290 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 291 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 292 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 293 PetscCall(MatBindToCPU(X, S->boundtocpu)); 294 #endif 295 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 296 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 297 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 298 } else { 299 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 300 } 301 PetscCall(MatProductSetFromOptions(X)); 302 PetscCall(MatProductSymbolic(X)); 303 PetscCall(MatProductNumeric(X)); 304 305 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 306 break; 307 default: 308 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 309 } 310 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 311 PetscCall(MatDestroy(&B)); 312 PetscCall(MatDestroy(&X)); 313 PetscFunctionReturn(PETSC_SUCCESS); 314 } 315 316 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 317 { 318 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 319 320 PetscFunctionBegin; 321 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 if (!expansion) { /* prepare for the condensation step */ 325 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 326 /* allocate MUMPS internal array to store reduced right-hand sides */ 327 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 328 PetscCall(PetscFree(mumps->id.redrhs)); 329 mumps->id.lredrhs = mumps->id.size_schur; 330 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 331 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 332 } 333 } else { /* prepare for the expansion step */ 334 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 335 PetscCall(MatMumpsSolveSchur_Private(F)); 336 mumps->id.ICNTL(26) = 2; /* expansion phase */ 337 PetscMUMPS_c(mumps); 338 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 339 /* restore defaults */ 340 mumps->id.ICNTL(26) = -1; 341 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 342 if (mumps->id.nrhs > 1) { 343 PetscCall(PetscFree(mumps->id.redrhs)); 344 mumps->id.lredrhs = 0; 345 mumps->sizeredrhs = 0; 346 } 347 } 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 /* 352 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 353 354 input: 355 A - matrix in aij,baij or sbaij format 356 shift - 0: C style output triple; 1: Fortran style output triple. 357 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 358 MAT_REUSE_MATRIX: only the values in v array are updated 359 output: 360 nnz - dim of r, c, and v (number of local nonzero entries of A) 361 r, c, v - row and col index, matrix values (matrix triples) 362 363 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 364 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 365 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 366 367 */ 368 369 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 370 { 371 const PetscScalar *av; 372 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 373 PetscInt64 nz, rnz, i, j, k; 374 PetscMUMPSInt *row, *col; 375 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 376 377 PetscFunctionBegin; 378 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 379 if (reuse == MAT_INITIAL_MATRIX) { 380 nz = aa->nz; 381 ai = aa->i; 382 aj = aa->j; 383 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 384 for (i = k = 0; i < M; i++) { 385 rnz = ai[i + 1] - ai[i]; 386 ajj = aj + ai[i]; 387 for (j = 0; j < rnz; j++) { 388 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 389 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 390 k++; 391 } 392 } 393 mumps->val = (PetscScalar *)av; 394 mumps->irn = row; 395 mumps->jcn = col; 396 mumps->nnz = nz; 397 } else PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); 398 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 403 { 404 PetscInt64 nz, i, j, k, r; 405 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 406 PetscMUMPSInt *row, *col; 407 408 PetscFunctionBegin; 409 nz = a->sliidx[a->totalslices]; 410 if (reuse == MAT_INITIAL_MATRIX) { 411 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 412 for (i = k = 0; i < a->totalslices; i++) { 413 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 414 } 415 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 416 mumps->irn = row; 417 mumps->jcn = col; 418 mumps->nnz = nz; 419 mumps->val = a->val; 420 } else PetscCall(PetscArraycpy(mumps->val, a->val, nz)); 421 PetscFunctionReturn(PETSC_SUCCESS); 422 } 423 424 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 425 { 426 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 427 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 428 PetscInt64 M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 429 PetscInt bs; 430 PetscMUMPSInt *row, *col; 431 432 PetscFunctionBegin; 433 if (reuse == MAT_INITIAL_MATRIX) { 434 PetscCall(MatGetBlockSize(A, &bs)); 435 M = A->rmap->N / bs; 436 ai = aa->i; 437 aj = aa->j; 438 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 439 for (i = 0; i < M; i++) { 440 ajj = aj + ai[i]; 441 rnz = ai[i + 1] - ai[i]; 442 for (k = 0; k < rnz; k++) { 443 for (j = 0; j < bs; j++) { 444 for (m = 0; m < bs; m++) { 445 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 446 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 447 idx++; 448 } 449 } 450 } 451 } 452 mumps->irn = row; 453 mumps->jcn = col; 454 mumps->nnz = nz; 455 mumps->val = aa->a; 456 } else PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); 457 PetscFunctionReturn(PETSC_SUCCESS); 458 } 459 460 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 461 { 462 const PetscInt *ai, *aj, *ajj; 463 PetscInt bs; 464 PetscInt64 nz, rnz, i, j, k, m; 465 PetscMUMPSInt *row, *col; 466 PetscScalar *val; 467 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 468 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 469 #if defined(PETSC_USE_COMPLEX) 470 PetscBool isset, hermitian; 471 #endif 472 473 PetscFunctionBegin; 474 #if defined(PETSC_USE_COMPLEX) 475 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 476 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 477 #endif 478 ai = aa->i; 479 aj = aa->j; 480 PetscCall(MatGetBlockSize(A, &bs)); 481 if (reuse == MAT_INITIAL_MATRIX) { 482 const PetscInt64 alloc_size = aa->nz * bs2; 483 484 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 485 if (bs > 1) { 486 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 487 mumps->val = mumps->val_alloc; 488 } else { 489 mumps->val = aa->a; 490 } 491 mumps->irn = row; 492 mumps->jcn = col; 493 } else { 494 row = mumps->irn; 495 col = mumps->jcn; 496 } 497 val = mumps->val; 498 499 nz = 0; 500 if (bs > 1) { 501 for (i = 0; i < mbs; i++) { 502 rnz = ai[i + 1] - ai[i]; 503 ajj = aj + ai[i]; 504 for (j = 0; j < rnz; j++) { 505 for (k = 0; k < bs; k++) { 506 for (m = 0; m < bs; m++) { 507 if (ajj[j] > i || k >= m) { 508 if (reuse == MAT_INITIAL_MATRIX) { 509 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 510 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 511 } 512 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 513 } 514 } 515 } 516 } 517 } 518 } else if (reuse == MAT_INITIAL_MATRIX) { 519 for (i = 0; i < mbs; i++) { 520 rnz = ai[i + 1] - ai[i]; 521 ajj = aj + ai[i]; 522 for (j = 0; j < rnz; j++) { 523 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 524 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 525 nz++; 526 } 527 } 528 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 529 } else PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX */ 530 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 531 PetscFunctionReturn(PETSC_SUCCESS); 532 } 533 534 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 535 { 536 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 537 PetscInt64 nz, rnz, i, j; 538 const PetscScalar *av, *v1; 539 PetscScalar *val; 540 PetscMUMPSInt *row, *col; 541 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 542 PetscBool missing; 543 #if defined(PETSC_USE_COMPLEX) 544 PetscBool hermitian, isset; 545 #endif 546 547 PetscFunctionBegin; 548 #if defined(PETSC_USE_COMPLEX) 549 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 550 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 551 #endif 552 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 553 ai = aa->i; 554 aj = aa->j; 555 adiag = aa->diag; 556 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 557 if (reuse == MAT_INITIAL_MATRIX) { 558 /* count nz in the upper triangular part of A */ 559 nz = 0; 560 if (missing) { 561 for (i = 0; i < M; i++) { 562 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 563 for (j = ai[i]; j < ai[i + 1]; j++) { 564 if (aj[j] < i) continue; 565 nz++; 566 } 567 } else { 568 nz += ai[i + 1] - adiag[i]; 569 } 570 } 571 } else { 572 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 573 } 574 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 575 PetscCall(PetscMalloc1(nz, &val)); 576 mumps->nnz = nz; 577 mumps->irn = row; 578 mumps->jcn = col; 579 mumps->val = mumps->val_alloc = val; 580 581 nz = 0; 582 if (missing) { 583 for (i = 0; i < M; i++) { 584 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 585 for (j = ai[i]; j < ai[i + 1]; j++) { 586 if (aj[j] < i) continue; 587 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 588 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 589 val[nz] = av[j]; 590 nz++; 591 } 592 } else { 593 rnz = ai[i + 1] - adiag[i]; 594 ajj = aj + adiag[i]; 595 v1 = av + adiag[i]; 596 for (j = 0; j < rnz; j++) { 597 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 598 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 599 val[nz++] = v1[j]; 600 } 601 } 602 } 603 } else { 604 for (i = 0; i < M; i++) { 605 rnz = ai[i + 1] - adiag[i]; 606 ajj = aj + adiag[i]; 607 v1 = av + adiag[i]; 608 for (j = 0; j < rnz; j++) { 609 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 610 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 611 val[nz++] = v1[j]; 612 } 613 } 614 } 615 } else { 616 nz = 0; 617 val = mumps->val; 618 if (missing) { 619 for (i = 0; i < M; i++) { 620 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 621 for (j = ai[i]; j < ai[i + 1]; j++) { 622 if (aj[j] < i) continue; 623 val[nz++] = av[j]; 624 } 625 } else { 626 rnz = ai[i + 1] - adiag[i]; 627 v1 = av + adiag[i]; 628 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 629 } 630 } 631 } else { 632 for (i = 0; i < M; i++) { 633 rnz = ai[i + 1] - adiag[i]; 634 v1 = av + adiag[i]; 635 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 636 } 637 } 638 } 639 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 640 PetscFunctionReturn(PETSC_SUCCESS); 641 } 642 643 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 644 { 645 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 646 PetscInt bs; 647 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 648 PetscMUMPSInt *row, *col; 649 const PetscScalar *av, *bv, *v1, *v2; 650 PetscScalar *val; 651 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 652 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 653 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 654 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 655 #if defined(PETSC_USE_COMPLEX) 656 PetscBool hermitian, isset; 657 #endif 658 659 PetscFunctionBegin; 660 #if defined(PETSC_USE_COMPLEX) 661 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 662 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 663 #endif 664 PetscCall(MatGetBlockSize(A, &bs)); 665 rstart = A->rmap->rstart; 666 ai = aa->i; 667 aj = aa->j; 668 bi = bb->i; 669 bj = bb->j; 670 av = aa->a; 671 bv = bb->a; 672 673 garray = mat->garray; 674 675 if (reuse == MAT_INITIAL_MATRIX) { 676 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 677 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 678 PetscCall(PetscMalloc1(nz, &val)); 679 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 680 mumps->irn = row; 681 mumps->jcn = col; 682 mumps->val = mumps->val_alloc = val; 683 } else { 684 val = mumps->val; 685 } 686 687 jj = 0; 688 irow = rstart; 689 for (i = 0; i < mbs; i++) { 690 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 691 countA = ai[i + 1] - ai[i]; 692 countB = bi[i + 1] - bi[i]; 693 bjj = bj + bi[i]; 694 v1 = av + ai[i] * bs2; 695 v2 = bv + bi[i] * bs2; 696 697 if (bs > 1) { 698 /* A-part */ 699 for (j = 0; j < countA; j++) { 700 for (k = 0; k < bs; k++) { 701 for (m = 0; m < bs; m++) { 702 if (rstart + ajj[j] * bs > irow || k >= m) { 703 if (reuse == MAT_INITIAL_MATRIX) { 704 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 705 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 706 } 707 val[jj++] = v1[j * bs2 + m + k * bs]; 708 } 709 } 710 } 711 } 712 713 /* B-part */ 714 for (j = 0; j < countB; j++) { 715 for (k = 0; k < bs; k++) { 716 for (m = 0; m < bs; m++) { 717 if (reuse == MAT_INITIAL_MATRIX) { 718 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 719 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 720 } 721 val[jj++] = v2[j * bs2 + m + k * bs]; 722 } 723 } 724 } 725 } else { 726 /* A-part */ 727 for (j = 0; j < countA; j++) { 728 if (reuse == MAT_INITIAL_MATRIX) { 729 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 730 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 731 } 732 val[jj++] = v1[j]; 733 } 734 735 /* B-part */ 736 for (j = 0; j < countB; j++) { 737 if (reuse == MAT_INITIAL_MATRIX) { 738 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 739 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 740 } 741 val[jj++] = v2[j]; 742 } 743 } 744 irow += bs; 745 } 746 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 747 PetscFunctionReturn(PETSC_SUCCESS); 748 } 749 750 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 751 { 752 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 753 PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 754 PetscMUMPSInt *row, *col; 755 const PetscScalar *av, *bv, *v1, *v2; 756 PetscScalar *val; 757 Mat Ad, Ao; 758 Mat_SeqAIJ *aa; 759 Mat_SeqAIJ *bb; 760 761 PetscFunctionBegin; 762 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 763 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 764 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 765 766 aa = (Mat_SeqAIJ *)(Ad)->data; 767 bb = (Mat_SeqAIJ *)(Ao)->data; 768 ai = aa->i; 769 aj = aa->j; 770 bi = bb->i; 771 bj = bb->j; 772 773 rstart = A->rmap->rstart; 774 cstart = A->cmap->rstart; 775 776 if (reuse == MAT_INITIAL_MATRIX) { 777 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 778 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 779 PetscCall(PetscMalloc1(nz, &val)); 780 mumps->nnz = nz; 781 mumps->irn = row; 782 mumps->jcn = col; 783 mumps->val = mumps->val_alloc = val; 784 } else { 785 val = mumps->val; 786 } 787 788 jj = 0; 789 irow = rstart; 790 for (i = 0; i < m; i++) { 791 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 792 countA = ai[i + 1] - ai[i]; 793 countB = bi[i + 1] - bi[i]; 794 bjj = bj + bi[i]; 795 v1 = av + ai[i]; 796 v2 = bv + bi[i]; 797 798 /* A-part */ 799 for (j = 0; j < countA; j++) { 800 if (reuse == MAT_INITIAL_MATRIX) { 801 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 802 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 803 } 804 val[jj++] = v1[j]; 805 } 806 807 /* B-part */ 808 for (j = 0; j < countB; j++) { 809 if (reuse == MAT_INITIAL_MATRIX) { 810 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 811 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 812 } 813 val[jj++] = v2[j]; 814 } 815 irow++; 816 } 817 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 818 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 819 PetscFunctionReturn(PETSC_SUCCESS); 820 } 821 822 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 823 { 824 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 825 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 826 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 827 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 828 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 829 const PetscInt bs2 = mat->bs2; 830 PetscInt bs; 831 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 832 PetscMUMPSInt *row, *col; 833 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 834 PetscScalar *val; 835 836 PetscFunctionBegin; 837 PetscCall(MatGetBlockSize(A, &bs)); 838 if (reuse == MAT_INITIAL_MATRIX) { 839 nz = bs2 * (aa->nz + bb->nz); 840 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 841 PetscCall(PetscMalloc1(nz, &val)); 842 mumps->nnz = nz; 843 mumps->irn = row; 844 mumps->jcn = col; 845 mumps->val = mumps->val_alloc = val; 846 } else { 847 val = mumps->val; 848 } 849 850 jj = 0; 851 irow = rstart; 852 for (i = 0; i < mbs; i++) { 853 countA = ai[i + 1] - ai[i]; 854 countB = bi[i + 1] - bi[i]; 855 ajj = aj + ai[i]; 856 bjj = bj + bi[i]; 857 v1 = av + bs2 * ai[i]; 858 v2 = bv + bs2 * bi[i]; 859 860 idx = 0; 861 /* A-part */ 862 for (k = 0; k < countA; k++) { 863 for (j = 0; j < bs; j++) { 864 for (n = 0; n < bs; n++) { 865 if (reuse == MAT_INITIAL_MATRIX) { 866 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 867 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 868 } 869 val[jj++] = v1[idx++]; 870 } 871 } 872 } 873 874 idx = 0; 875 /* B-part */ 876 for (k = 0; k < countB; k++) { 877 for (j = 0; j < bs; j++) { 878 for (n = 0; n < bs; n++) { 879 if (reuse == MAT_INITIAL_MATRIX) { 880 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 881 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 882 } 883 val[jj++] = v2[idx++]; 884 } 885 } 886 } 887 irow += bs; 888 } 889 PetscFunctionReturn(PETSC_SUCCESS); 890 } 891 892 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 893 { 894 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 895 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 896 PetscMUMPSInt *row, *col; 897 const PetscScalar *av, *bv, *v1, *v2; 898 PetscScalar *val; 899 Mat Ad, Ao; 900 Mat_SeqAIJ *aa; 901 Mat_SeqAIJ *bb; 902 #if defined(PETSC_USE_COMPLEX) 903 PetscBool hermitian, isset; 904 #endif 905 906 PetscFunctionBegin; 907 #if defined(PETSC_USE_COMPLEX) 908 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 909 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 910 #endif 911 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 912 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 913 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 914 915 aa = (Mat_SeqAIJ *)(Ad)->data; 916 bb = (Mat_SeqAIJ *)(Ao)->data; 917 ai = aa->i; 918 aj = aa->j; 919 adiag = aa->diag; 920 bi = bb->i; 921 bj = bb->j; 922 923 rstart = A->rmap->rstart; 924 925 if (reuse == MAT_INITIAL_MATRIX) { 926 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 927 nzb = 0; /* num of upper triangular entries in mat->B */ 928 for (i = 0; i < m; i++) { 929 nza += (ai[i + 1] - adiag[i]); 930 countB = bi[i + 1] - bi[i]; 931 bjj = bj + bi[i]; 932 for (j = 0; j < countB; j++) { 933 if (garray[bjj[j]] > rstart) nzb++; 934 } 935 } 936 937 nz = nza + nzb; /* total nz of upper triangular part of mat */ 938 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 939 PetscCall(PetscMalloc1(nz, &val)); 940 mumps->nnz = nz; 941 mumps->irn = row; 942 mumps->jcn = col; 943 mumps->val = mumps->val_alloc = val; 944 } else { 945 val = mumps->val; 946 } 947 948 jj = 0; 949 irow = rstart; 950 for (i = 0; i < m; i++) { 951 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 952 v1 = av + adiag[i]; 953 countA = ai[i + 1] - adiag[i]; 954 countB = bi[i + 1] - bi[i]; 955 bjj = bj + bi[i]; 956 v2 = bv + bi[i]; 957 958 /* A-part */ 959 for (j = 0; j < countA; j++) { 960 if (reuse == MAT_INITIAL_MATRIX) { 961 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 962 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 963 } 964 val[jj++] = v1[j]; 965 } 966 967 /* B-part */ 968 for (j = 0; j < countB; j++) { 969 if (garray[bjj[j]] > rstart) { 970 if (reuse == MAT_INITIAL_MATRIX) { 971 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 972 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 973 } 974 val[jj++] = v2[j]; 975 } 976 } 977 irow++; 978 } 979 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 980 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 981 PetscFunctionReturn(PETSC_SUCCESS); 982 } 983 984 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 985 { 986 const PetscScalar *av; 987 const PetscInt M = A->rmap->n; 988 PetscInt64 i; 989 PetscMUMPSInt *row, *col; 990 Vec v; 991 992 PetscFunctionBegin; 993 PetscCall(MatDiagonalGetDiagonal(A, &v)); 994 PetscCall(VecGetArrayRead(v, &av)); 995 if (reuse == MAT_INITIAL_MATRIX) { 996 PetscCall(PetscMalloc2(M, &row, M, &col)); 997 for (i = 0; i < M; i++) { 998 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 999 col[i] = row[i]; 1000 } 1001 mumps->val = (PetscScalar *)av; 1002 mumps->irn = row; 1003 mumps->jcn = col; 1004 mumps->nnz = M; 1005 } else PetscCall(PetscArraycpy(mumps->val, av, M)); 1006 PetscCall(VecRestoreArrayRead(v, &av)); 1007 PetscFunctionReturn(PETSC_SUCCESS); 1008 } 1009 1010 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1011 { 1012 PetscScalar *v; 1013 const PetscInt m = A->rmap->n, N = A->cmap->N; 1014 PetscInt lda; 1015 PetscInt64 i, j; 1016 PetscMUMPSInt *row, *col; 1017 1018 PetscFunctionBegin; 1019 PetscCall(MatDenseGetArray(A, &v)); 1020 PetscCall(MatDenseGetLDA(A, &lda)); 1021 if (reuse == MAT_INITIAL_MATRIX) { 1022 PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 1023 for (i = 0; i < m; i++) { 1024 col[i] = 0; 1025 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1026 } 1027 for (j = 1; j < N; j++) { 1028 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 1029 PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 1030 } 1031 if (lda == m) mumps->val = v; 1032 else { 1033 PetscCall(PetscMalloc1(m * N, &mumps->val)); 1034 mumps->val_alloc = mumps->val; 1035 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1036 } 1037 mumps->irn = row; 1038 mumps->jcn = col; 1039 mumps->nnz = m * N; 1040 } else { 1041 if (lda == m && !mumps->nest_vals) mumps->val = v; 1042 else { 1043 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1044 } 1045 } 1046 PetscCall(MatDenseRestoreArray(A, &v)); 1047 PetscFunctionReturn(PETSC_SUCCESS); 1048 } 1049 1050 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1051 { 1052 Mat **mats; 1053 PetscInt nr, nc; 1054 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1055 1056 PetscFunctionBegin; 1057 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1058 if (reuse == MAT_INITIAL_MATRIX) { 1059 PetscMUMPSInt *irns, *jcns; 1060 PetscScalar *vals; 1061 PetscInt64 totnnz, cumnnz, maxnnz; 1062 PetscInt *pjcns_w; 1063 IS *rows, *cols; 1064 PetscInt **rows_idx, **cols_idx; 1065 1066 cumnnz = 0; 1067 maxnnz = 0; 1068 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1069 for (PetscInt r = 0; r < nr; r++) { 1070 for (PetscInt c = 0; c < nc; c++) { 1071 Mat sub = mats[r][c]; 1072 1073 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1074 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1075 if (sub) { 1076 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1077 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE, isDiag, isDense; 1078 MatInfo info; 1079 1080 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1081 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1082 else { 1083 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1084 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1085 } 1086 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1087 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1088 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1089 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1090 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1091 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1092 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1093 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 1094 1095 if (chol) { 1096 if (r == c) { 1097 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1098 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1099 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1100 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1101 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1102 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1103 } else { 1104 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1105 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1106 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1107 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1108 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1109 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1110 } 1111 } else { 1112 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1113 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1114 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1115 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1116 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1117 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1118 } 1119 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1120 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1121 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1122 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1123 maxnnz = PetscMax(maxnnz, info.nz_used); 1124 } 1125 } 1126 } 1127 1128 /* Allocate total COO */ 1129 totnnz = cumnnz; 1130 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1131 PetscCall(PetscMalloc1(totnnz, &vals)); 1132 1133 /* Handle rows and column maps 1134 We directly map rows and use an SF for the columns */ 1135 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1136 PetscCall(MatNestGetISs(A, rows, cols)); 1137 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1138 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1139 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1140 else (void)maxnnz; 1141 1142 cumnnz = 0; 1143 for (PetscInt r = 0; r < nr; r++) { 1144 for (PetscInt c = 0; c < nc; c++) { 1145 Mat sub = mats[r][c]; 1146 const PetscInt *ridx = rows_idx[r]; 1147 const PetscInt *cidx = cols_idx[c]; 1148 PetscInt rst; 1149 PetscSF csf; 1150 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1151 PetscLayout cmap; 1152 1153 mumps->nest_vals_start[r * nc + c] = cumnnz; 1154 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1155 1156 /* Extract inner blocks if needed */ 1157 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1158 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1159 else { 1160 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1161 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1162 } 1163 swap = (PetscBool)(isTrans || isHTrans); 1164 1165 /* Get column layout to map off-process columns */ 1166 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1167 1168 /* Get row start to map on-process rows */ 1169 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1170 1171 /* Directly use the mumps datastructure and use C ordering for now */ 1172 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1173 1174 /* Swap the role of rows and columns indices for transposed blocks 1175 since we need values with global final ordering */ 1176 if (swap) { 1177 cidx = rows_idx[r]; 1178 ridx = cols_idx[c]; 1179 } 1180 1181 /* Communicate column indices 1182 This could have been done with a single SF but it would have complicated the code a lot. 1183 But since we do it only once, we pay the price of setting up an SF for each block */ 1184 if (PetscDefined(USE_64BIT_INDICES)) { 1185 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1186 } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 1187 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1188 PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1189 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1190 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1191 PetscCall(PetscSFDestroy(&csf)); 1192 1193 /* Import indices: use direct map for rows and mapped indices for columns */ 1194 if (swap) { 1195 for (PetscInt k = 0; k < mumps->nnz; k++) { 1196 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1197 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1198 } 1199 } else { 1200 for (PetscInt k = 0; k < mumps->nnz; k++) { 1201 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1202 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1203 } 1204 } 1205 1206 /* Import values to full COO */ 1207 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1208 if (isHTrans) { /* conjugate the entries */ 1209 PetscScalar *v = vals + cumnnz; 1210 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]); 1211 } 1212 1213 /* Shift new starting point and sanity check */ 1214 cumnnz += mumps->nnz; 1215 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1216 1217 /* Free scratch memory */ 1218 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1219 PetscCall(PetscFree(mumps->val_alloc)); 1220 mumps->val = NULL; 1221 mumps->nnz = 0; 1222 } 1223 } 1224 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1225 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1226 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1227 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1228 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1229 mumps->nest_vals_start[nr * nc] = cumnnz; 1230 1231 /* Set pointers for final MUMPS data structure */ 1232 mumps->nest_vals = vals; 1233 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1234 mumps->val = vals; 1235 mumps->irn = irns; 1236 mumps->jcn = jcns; 1237 mumps->nnz = cumnnz; 1238 } else { 1239 PetscScalar *oval = mumps->nest_vals; 1240 for (PetscInt r = 0; r < nr; r++) { 1241 for (PetscInt c = 0; c < nc; c++) { 1242 PetscBool isTrans, isHTrans = PETSC_FALSE; 1243 Mat sub = mats[r][c]; 1244 PetscInt midx = r * nc + c; 1245 1246 if (!mumps->nest_convert_to_triples[midx]) continue; 1247 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1248 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1249 else { 1250 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1251 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1252 } 1253 mumps->val = oval + mumps->nest_vals_start[midx]; 1254 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1255 if (isHTrans) { 1256 PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1257 for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1258 } 1259 } 1260 } 1261 mumps->val = oval; 1262 } 1263 PetscFunctionReturn(PETSC_SUCCESS); 1264 } 1265 1266 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1267 { 1268 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1269 1270 PetscFunctionBegin; 1271 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1272 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1273 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1274 PetscCall(VecDestroy(&mumps->b_seq)); 1275 PetscCall(VecDestroy(&mumps->x_seq)); 1276 PetscCall(PetscFree(mumps->id.perm_in)); 1277 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1278 PetscCall(PetscFree(mumps->val_alloc)); 1279 PetscCall(PetscFree(mumps->info)); 1280 PetscCall(PetscFree(mumps->ICNTL_pre)); 1281 PetscCall(PetscFree(mumps->CNTL_pre)); 1282 PetscCall(MatMumpsResetSchur_Private(mumps)); 1283 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1284 mumps->id.job = JOB_END; 1285 PetscMUMPS_c(mumps); 1286 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1287 if (mumps->mumps_comm != MPI_COMM_NULL) { 1288 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1289 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1290 } 1291 } 1292 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1293 if (mumps->use_petsc_omp_support) { 1294 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1295 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1296 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1297 } 1298 #endif 1299 PetscCall(PetscFree(mumps->ia_alloc)); 1300 PetscCall(PetscFree(mumps->ja_alloc)); 1301 PetscCall(PetscFree(mumps->recvcount)); 1302 PetscCall(PetscFree(mumps->reqs)); 1303 PetscCall(PetscFree(mumps->irhs_loc)); 1304 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1305 PetscCall(PetscFree(mumps->nest_vals)); 1306 PetscCall(PetscFree(A->data)); 1307 1308 /* clear composed functions */ 1309 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1310 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1311 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1312 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1313 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1314 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1315 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1316 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1317 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1318 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1319 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1320 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1321 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1322 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1323 PetscFunctionReturn(PETSC_SUCCESS); 1324 } 1325 1326 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1327 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1328 { 1329 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1330 const PetscMPIInt ompsize = mumps->omp_comm_size; 1331 PetscInt i, m, M, rstart; 1332 1333 PetscFunctionBegin; 1334 PetscCall(MatGetSize(A, &M, NULL)); 1335 PetscCall(MatGetLocalSize(A, &m, NULL)); 1336 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1337 if (ompsize == 1) { 1338 if (!mumps->irhs_loc) { 1339 mumps->nloc_rhs = m; 1340 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1341 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1342 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1343 } 1344 mumps->id.rhs_loc = (MumpsScalar *)array; 1345 } else { 1346 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1347 const PetscInt *ranges; 1348 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1349 MPI_Group petsc_group, omp_group; 1350 PetscScalar *recvbuf = NULL; 1351 1352 if (mumps->is_omp_master) { 1353 /* Lazily initialize the omp stuff for distributed rhs */ 1354 if (!mumps->irhs_loc) { 1355 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1356 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1357 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1358 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1359 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1360 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1361 1362 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1363 mumps->nloc_rhs = 0; 1364 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1365 for (j = 0; j < ompsize; j++) { 1366 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1367 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1368 } 1369 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1370 for (j = k = 0; j < ompsize; j++) { 1371 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1372 } 1373 1374 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1375 PetscCallMPI(MPI_Group_free(&petsc_group)); 1376 PetscCallMPI(MPI_Group_free(&omp_group)); 1377 } 1378 1379 /* Realloc buffers when current nrhs is bigger than what we have met */ 1380 if (nrhs > mumps->max_nrhs) { 1381 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1382 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1383 mumps->max_nrhs = nrhs; 1384 } 1385 1386 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1387 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1388 mumps->rhs_disps[0] = 0; 1389 for (j = 1; j < ompsize; j++) { 1390 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1391 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1392 } 1393 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1394 } 1395 1396 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1397 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1398 1399 if (mumps->is_omp_master) { 1400 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1401 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1402 for (j = 0; j < ompsize; j++) { 1403 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1404 dst = dstbase; 1405 for (i = 0; i < nrhs; i++) { 1406 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1407 src += mumps->rhs_nrow[j]; 1408 dst += mumps->nloc_rhs; 1409 } 1410 dstbase += mumps->rhs_nrow[j]; 1411 } 1412 } 1413 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1414 } 1415 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1416 } 1417 mumps->id.nrhs = nrhs; 1418 mumps->id.nloc_rhs = mumps->nloc_rhs; 1419 mumps->id.lrhs_loc = mumps->nloc_rhs; 1420 mumps->id.irhs_loc = mumps->irhs_loc; 1421 PetscFunctionReturn(PETSC_SUCCESS); 1422 } 1423 1424 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1425 { 1426 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1427 const PetscScalar *rarray = NULL; 1428 PetscScalar *array; 1429 IS is_iden, is_petsc; 1430 PetscInt i; 1431 PetscBool second_solve = PETSC_FALSE; 1432 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1433 1434 PetscFunctionBegin; 1435 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1436 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1437 &cite1)); 1438 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1439 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1440 &cite2)); 1441 1442 if (A->factorerrortype) { 1443 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1444 PetscCall(VecSetInf(x)); 1445 PetscFunctionReturn(PETSC_SUCCESS); 1446 } 1447 1448 mumps->id.nrhs = 1; 1449 if (mumps->petsc_size > 1) { 1450 if (mumps->ICNTL20 == 10) { 1451 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1452 PetscCall(VecGetArrayRead(b, &rarray)); 1453 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1454 } else { 1455 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1456 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1457 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1458 if (!mumps->myid) { 1459 PetscCall(VecGetArray(mumps->b_seq, &array)); 1460 mumps->id.rhs = (MumpsScalar *)array; 1461 } 1462 } 1463 } else { /* petsc_size == 1 */ 1464 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1465 PetscCall(VecCopy(b, x)); 1466 PetscCall(VecGetArray(x, &array)); 1467 mumps->id.rhs = (MumpsScalar *)array; 1468 } 1469 1470 /* 1471 handle condensation step of Schur complement (if any) 1472 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1473 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1474 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1475 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1476 */ 1477 if (mumps->id.size_schur > 0) { 1478 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1479 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1480 second_solve = PETSC_TRUE; 1481 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1482 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1483 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1484 } 1485 /* solve phase */ 1486 mumps->id.job = JOB_SOLVE; 1487 PetscMUMPS_c(mumps); 1488 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1489 1490 /* handle expansion step of Schur complement (if any) */ 1491 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1492 else if (mumps->id.ICNTL(26) == 1) { 1493 PetscCall(MatMumpsSolveSchur_Private(A)); 1494 for (i = 0; i < mumps->id.size_schur; ++i) { 1495 #if !defined(PETSC_USE_COMPLEX) 1496 PetscScalar val = mumps->id.redrhs[i]; 1497 #else 1498 PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i; 1499 #endif 1500 array[mumps->id.listvar_schur[i] - 1] = val; 1501 } 1502 } 1503 1504 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1505 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1506 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1507 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1508 } 1509 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1510 PetscInt *isol2_loc = NULL; 1511 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1512 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1513 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1514 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1515 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1516 PetscCall(ISDestroy(&is_iden)); 1517 PetscCall(ISDestroy(&is_petsc)); 1518 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1519 } 1520 1521 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1522 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1523 } 1524 1525 if (mumps->petsc_size > 1) { 1526 if (mumps->ICNTL20 == 10) { 1527 PetscCall(VecRestoreArrayRead(b, &rarray)); 1528 } else if (!mumps->myid) { 1529 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1530 } 1531 } else PetscCall(VecRestoreArray(x, &array)); 1532 1533 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1534 PetscFunctionReturn(PETSC_SUCCESS); 1535 } 1536 1537 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1538 { 1539 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1540 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1541 1542 PetscFunctionBegin; 1543 mumps->id.ICNTL(9) = 0; 1544 PetscCall(MatSolve_MUMPS(A, b, x)); 1545 mumps->id.ICNTL(9) = value; 1546 PetscFunctionReturn(PETSC_SUCCESS); 1547 } 1548 1549 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1550 { 1551 Mat Bt = NULL; 1552 PetscBool denseX, denseB, flg, flgT; 1553 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1554 PetscInt i, nrhs, M; 1555 PetscScalar *array; 1556 const PetscScalar *rbray; 1557 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1558 PetscMUMPSInt *isol_loc, *isol_loc_save; 1559 PetscScalar *bray, *sol_loc, *sol_loc_save; 1560 IS is_to, is_from; 1561 PetscInt k, proc, j, m, myrstart; 1562 const PetscInt *rstart; 1563 Vec v_mpi, msol_loc; 1564 VecScatter scat_sol; 1565 Vec b_seq; 1566 VecScatter scat_rhs; 1567 PetscScalar *aa; 1568 PetscInt spnr, *ia, *ja; 1569 Mat_MPIAIJ *b = NULL; 1570 1571 PetscFunctionBegin; 1572 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1573 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1574 1575 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1576 if (denseB) { 1577 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1578 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1579 } else { /* sparse B */ 1580 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1581 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1582 if (flgT) { /* input B is transpose of actual RHS matrix, 1583 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1584 PetscCall(MatTransposeGetMat(B, &Bt)); 1585 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1586 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1587 } 1588 1589 PetscCall(MatGetSize(B, &M, &nrhs)); 1590 mumps->id.nrhs = nrhs; 1591 mumps->id.lrhs = M; 1592 mumps->id.rhs = NULL; 1593 1594 if (mumps->petsc_size == 1) { 1595 PetscScalar *aa; 1596 PetscInt spnr, *ia, *ja; 1597 PetscBool second_solve = PETSC_FALSE; 1598 1599 PetscCall(MatDenseGetArray(X, &array)); 1600 mumps->id.rhs = (MumpsScalar *)array; 1601 1602 if (denseB) { 1603 /* copy B to X */ 1604 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1605 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1606 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1607 } else { /* sparse B */ 1608 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1609 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1610 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1611 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1612 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1613 } 1614 /* handle condensation step of Schur complement (if any) */ 1615 if (mumps->id.size_schur > 0) { 1616 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1617 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1618 second_solve = PETSC_TRUE; 1619 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1620 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1621 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1622 } 1623 /* solve phase */ 1624 mumps->id.job = JOB_SOLVE; 1625 PetscMUMPS_c(mumps); 1626 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1627 1628 /* handle expansion step of Schur complement (if any) */ 1629 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1630 else if (mumps->id.ICNTL(26) == 1) { 1631 PetscCall(MatMumpsSolveSchur_Private(A)); 1632 for (j = 0; j < nrhs; ++j) 1633 for (i = 0; i < mumps->id.size_schur; ++i) { 1634 #if !defined(PETSC_USE_COMPLEX) 1635 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs]; 1636 #else 1637 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i; 1638 #endif 1639 array[mumps->id.listvar_schur[i] - 1 + j * M] = val; 1640 } 1641 } 1642 if (!denseB) { /* sparse B */ 1643 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1644 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1645 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1646 } 1647 PetscCall(MatDenseRestoreArray(X, &array)); 1648 PetscFunctionReturn(PETSC_SUCCESS); 1649 } 1650 1651 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1652 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1653 1654 /* create msol_loc to hold mumps local solution */ 1655 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1656 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1657 1658 lsol_loc = mumps->id.lsol_loc; 1659 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1660 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1661 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1662 mumps->id.isol_loc = isol_loc; 1663 1664 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1665 1666 if (denseB) { 1667 if (mumps->ICNTL20 == 10) { 1668 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1669 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1670 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1671 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1672 PetscCall(MatGetLocalSize(B, &m, NULL)); 1673 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1674 } else { 1675 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1676 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1677 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1678 0, re-arrange B into desired order, which is a local operation. 1679 */ 1680 1681 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1682 /* wrap dense rhs matrix B into a vector v_mpi */ 1683 PetscCall(MatGetLocalSize(B, &m, NULL)); 1684 PetscCall(MatDenseGetArray(B, &bray)); 1685 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1686 PetscCall(MatDenseRestoreArray(B, &bray)); 1687 1688 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1689 if (!mumps->myid) { 1690 PetscInt *idx; 1691 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1692 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1693 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1694 k = 0; 1695 for (proc = 0; proc < mumps->petsc_size; proc++) { 1696 for (j = 0; j < nrhs; j++) { 1697 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1698 } 1699 } 1700 1701 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1702 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1703 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1704 } else { 1705 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1706 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1707 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1708 } 1709 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1710 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1711 PetscCall(ISDestroy(&is_to)); 1712 PetscCall(ISDestroy(&is_from)); 1713 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1714 1715 if (!mumps->myid) { /* define rhs on the host */ 1716 PetscCall(VecGetArray(b_seq, &bray)); 1717 mumps->id.rhs = (MumpsScalar *)bray; 1718 PetscCall(VecRestoreArray(b_seq, &bray)); 1719 } 1720 } 1721 } else { /* sparse B */ 1722 b = (Mat_MPIAIJ *)Bt->data; 1723 1724 /* wrap dense X into a vector v_mpi */ 1725 PetscCall(MatGetLocalSize(X, &m, NULL)); 1726 PetscCall(MatDenseGetArray(X, &bray)); 1727 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1728 PetscCall(MatDenseRestoreArray(X, &bray)); 1729 1730 if (!mumps->myid) { 1731 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1732 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1733 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1734 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1735 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1736 } else { 1737 mumps->id.irhs_ptr = NULL; 1738 mumps->id.irhs_sparse = NULL; 1739 mumps->id.nz_rhs = 0; 1740 mumps->id.rhs_sparse = NULL; 1741 } 1742 } 1743 1744 /* solve phase */ 1745 mumps->id.job = JOB_SOLVE; 1746 PetscMUMPS_c(mumps); 1747 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1748 1749 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1750 PetscCall(MatDenseGetArray(X, &array)); 1751 PetscCall(VecPlaceArray(v_mpi, array)); 1752 1753 /* create scatter scat_sol */ 1754 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1755 /* iidx: index for scatter mumps solution to petsc X */ 1756 1757 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1758 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1759 for (i = 0; i < lsol_loc; i++) { 1760 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1761 1762 for (proc = 0; proc < mumps->petsc_size; proc++) { 1763 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1764 myrstart = rstart[proc]; 1765 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1766 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1767 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1768 break; 1769 } 1770 } 1771 1772 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1773 } 1774 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1775 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1776 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1777 PetscCall(ISDestroy(&is_from)); 1778 PetscCall(ISDestroy(&is_to)); 1779 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1780 PetscCall(MatDenseRestoreArray(X, &array)); 1781 1782 /* free spaces */ 1783 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1784 mumps->id.isol_loc = isol_loc_save; 1785 1786 PetscCall(PetscFree2(sol_loc, isol_loc)); 1787 PetscCall(PetscFree(idxx)); 1788 PetscCall(VecDestroy(&msol_loc)); 1789 PetscCall(VecDestroy(&v_mpi)); 1790 if (!denseB) { 1791 if (!mumps->myid) { 1792 b = (Mat_MPIAIJ *)Bt->data; 1793 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1794 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1795 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1796 } 1797 } else { 1798 if (mumps->ICNTL20 == 0) { 1799 PetscCall(VecDestroy(&b_seq)); 1800 PetscCall(VecScatterDestroy(&scat_rhs)); 1801 } 1802 } 1803 PetscCall(VecScatterDestroy(&scat_sol)); 1804 PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)))); 1805 PetscFunctionReturn(PETSC_SUCCESS); 1806 } 1807 1808 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1809 { 1810 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1811 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1812 1813 PetscFunctionBegin; 1814 mumps->id.ICNTL(9) = 0; 1815 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1816 mumps->id.ICNTL(9) = value; 1817 PetscFunctionReturn(PETSC_SUCCESS); 1818 } 1819 1820 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1821 { 1822 PetscBool flg; 1823 Mat B; 1824 1825 PetscFunctionBegin; 1826 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1827 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1828 1829 /* Create B=Bt^T that uses Bt's data structure */ 1830 PetscCall(MatCreateTranspose(Bt, &B)); 1831 1832 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1833 PetscCall(MatDestroy(&B)); 1834 PetscFunctionReturn(PETSC_SUCCESS); 1835 } 1836 1837 #if !defined(PETSC_USE_COMPLEX) 1838 /* 1839 input: 1840 F: numeric factor 1841 output: 1842 nneg: total number of negative pivots 1843 nzero: total number of zero pivots 1844 npos: (global dimension of F) - nneg - nzero 1845 */ 1846 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1847 { 1848 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1849 PetscMPIInt size; 1850 1851 PetscFunctionBegin; 1852 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1853 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1854 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1855 1856 if (nneg) *nneg = mumps->id.INFOG(12); 1857 if (nzero || npos) { 1858 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1859 if (nzero) *nzero = mumps->id.INFOG(28); 1860 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1861 } 1862 PetscFunctionReturn(PETSC_SUCCESS); 1863 } 1864 #endif 1865 1866 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1867 { 1868 PetscInt i, nreqs; 1869 PetscMUMPSInt *irn, *jcn; 1870 PetscMPIInt count; 1871 PetscInt64 totnnz, remain; 1872 const PetscInt osize = mumps->omp_comm_size; 1873 PetscScalar *val; 1874 1875 PetscFunctionBegin; 1876 if (osize > 1) { 1877 if (reuse == MAT_INITIAL_MATRIX) { 1878 /* master first gathers counts of nonzeros to receive */ 1879 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1880 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1881 1882 /* Then each computes number of send/recvs */ 1883 if (mumps->is_omp_master) { 1884 /* Start from 1 since self communication is not done in MPI */ 1885 nreqs = 0; 1886 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1887 } else { 1888 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1889 } 1890 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1891 1892 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1893 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1894 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1895 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1896 */ 1897 nreqs = 0; /* counter for actual send/recvs */ 1898 if (mumps->is_omp_master) { 1899 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1900 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1901 PetscCall(PetscMalloc1(totnnz, &val)); 1902 1903 /* Self communication */ 1904 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1905 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1906 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1907 1908 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1909 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1910 PetscCall(PetscFree(mumps->val_alloc)); 1911 mumps->nnz = totnnz; 1912 mumps->irn = irn; 1913 mumps->jcn = jcn; 1914 mumps->val = mumps->val_alloc = val; 1915 1916 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1917 jcn += mumps->recvcount[0]; 1918 val += mumps->recvcount[0]; 1919 1920 /* Remote communication */ 1921 for (i = 1; i < osize; i++) { 1922 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1923 remain = mumps->recvcount[i] - count; 1924 while (count > 0) { 1925 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1926 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1927 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1928 irn += count; 1929 jcn += count; 1930 val += count; 1931 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1932 remain -= count; 1933 } 1934 } 1935 } else { 1936 irn = mumps->irn; 1937 jcn = mumps->jcn; 1938 val = mumps->val; 1939 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1940 remain = mumps->nnz - count; 1941 while (count > 0) { 1942 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1943 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1944 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1945 irn += count; 1946 jcn += count; 1947 val += count; 1948 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1949 remain -= count; 1950 } 1951 } 1952 } else { 1953 nreqs = 0; 1954 if (mumps->is_omp_master) { 1955 val = mumps->val + mumps->recvcount[0]; 1956 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1957 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1958 remain = mumps->recvcount[i] - count; 1959 while (count > 0) { 1960 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1961 val += count; 1962 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1963 remain -= count; 1964 } 1965 } 1966 } else { 1967 val = mumps->val; 1968 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1969 remain = mumps->nnz - count; 1970 while (count > 0) { 1971 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1972 val += count; 1973 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1974 remain -= count; 1975 } 1976 } 1977 } 1978 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1979 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1980 } 1981 PetscFunctionReturn(PETSC_SUCCESS); 1982 } 1983 1984 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1985 { 1986 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1987 PetscBool isMPIAIJ; 1988 1989 PetscFunctionBegin; 1990 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1991 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1992 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1993 PetscFunctionReturn(PETSC_SUCCESS); 1994 } 1995 1996 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1997 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1998 1999 /* numerical factorization phase */ 2000 mumps->id.job = JOB_FACTNUMERIC; 2001 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2002 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 2003 } else { 2004 mumps->id.a_loc = (MumpsScalar *)mumps->val; 2005 } 2006 PetscMUMPS_c(mumps); 2007 if (mumps->id.INFOG(1) < 0) { 2008 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2009 if (mumps->id.INFOG(1) == -10) { 2010 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2011 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2012 } else if (mumps->id.INFOG(1) == -13) { 2013 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2014 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2015 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2016 PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2017 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2018 } else { 2019 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2020 F->factorerrortype = MAT_FACTOR_OTHER; 2021 } 2022 } 2023 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2024 2025 F->assembled = PETSC_TRUE; 2026 2027 if (F->schur) { /* reset Schur status to unfactored */ 2028 #if defined(PETSC_HAVE_CUDA) 2029 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 2030 #endif 2031 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2032 mumps->id.ICNTL(19) = 2; 2033 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2034 } 2035 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2036 } 2037 2038 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2039 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2040 2041 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2042 if (mumps->petsc_size > 1) { 2043 PetscInt lsol_loc; 2044 PetscScalar *sol_loc; 2045 2046 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 2047 2048 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2049 if (mumps->x_seq) { 2050 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2051 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 2052 PetscCall(VecDestroy(&mumps->x_seq)); 2053 } 2054 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2055 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 2056 mumps->id.lsol_loc = lsol_loc; 2057 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 2058 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 2059 } 2060 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 2061 PetscFunctionReturn(PETSC_SUCCESS); 2062 } 2063 2064 /* Sets MUMPS options from the options database */ 2065 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2066 { 2067 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2068 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2069 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2070 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 2071 MumpsScalar *arr; 2072 2073 PetscFunctionBegin; 2074 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2075 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2076 PetscInt nthreads = 0; 2077 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2078 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2079 2080 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2081 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2082 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2083 2084 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2085 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2086 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2087 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2088 if (mumps->use_petsc_omp_support) { 2089 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2090 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2091 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2092 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2093 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2094 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2095 #endif 2096 } else { 2097 mumps->omp_comm = PETSC_COMM_SELF; 2098 mumps->mumps_comm = mumps->petsc_comm; 2099 mumps->is_omp_master = PETSC_TRUE; 2100 } 2101 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2102 mumps->reqs = NULL; 2103 mumps->tag = 0; 2104 2105 if (mumps->mumps_comm != MPI_COMM_NULL) { 2106 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2107 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2108 MPI_Comm comm; 2109 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2110 mumps->mumps_comm = comm; 2111 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2112 } 2113 2114 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2115 mumps->id.job = JOB_INIT; 2116 mumps->id.par = 1; /* host participates factorizaton and solve */ 2117 mumps->id.sym = mumps->sym; 2118 2119 size = mumps->id.size_schur; 2120 arr = mumps->id.schur; 2121 listvar_schur = mumps->id.listvar_schur; 2122 PetscMUMPS_c(mumps); 2123 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2124 /* restore cached ICNTL and CNTL values */ 2125 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2126 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2127 PetscCall(PetscFree(mumps->ICNTL_pre)); 2128 PetscCall(PetscFree(mumps->CNTL_pre)); 2129 2130 if (schur) { 2131 mumps->id.size_schur = size; 2132 mumps->id.schur_lld = size; 2133 mumps->id.schur = arr; 2134 mumps->id.listvar_schur = listvar_schur; 2135 if (mumps->petsc_size > 1) { 2136 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2137 2138 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2139 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2140 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2141 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2142 } else { 2143 if (F->factortype == MAT_FACTOR_LU) { 2144 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2145 } else { 2146 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2147 } 2148 } 2149 mumps->id.ICNTL(26) = -1; 2150 } 2151 2152 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2153 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2154 */ 2155 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2156 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2157 2158 mumps->scat_rhs = NULL; 2159 mumps->scat_sol = NULL; 2160 2161 /* set PETSc-MUMPS default options - override MUMPS default */ 2162 mumps->id.ICNTL(3) = 0; 2163 mumps->id.ICNTL(4) = 0; 2164 if (mumps->petsc_size == 1) { 2165 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2166 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2167 } else { 2168 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2169 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2170 } 2171 } 2172 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2173 if (flg) mumps->id.ICNTL(1) = icntl; 2174 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2175 if (flg) mumps->id.ICNTL(2) = icntl; 2176 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2177 if (flg) mumps->id.ICNTL(3) = icntl; 2178 2179 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2180 if (flg) mumps->id.ICNTL(4) = icntl; 2181 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2182 2183 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2184 if (flg) mumps->id.ICNTL(6) = icntl; 2185 2186 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2187 if (flg) { 2188 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2189 mumps->id.ICNTL(7) = icntl; 2190 } 2191 2192 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2193 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2194 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2195 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2196 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2197 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2198 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2199 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2200 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2201 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2202 if (flg) { 2203 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2204 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2205 } 2206 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2207 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2208 PetscCall(MatDestroy(&F->schur)); 2209 PetscCall(MatMumpsResetSchur_Private(mumps)); 2210 } 2211 2212 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2213 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2214 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2215 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2216 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2217 In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 2218 */ 2219 #if PETSC_PKG_MUMPS_VERSION_GE(5, 6, 2) && defined(PETSC_HAVE_MUMPS_AVOID_MPI_IN_PLACE) 2220 mumps->ICNTL20 = 10; 2221 #elif PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 2222 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2223 #else 2224 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2225 #endif 2226 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2227 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2228 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2229 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2230 #endif 2231 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2232 2233 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2234 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2235 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2236 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2237 2238 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2239 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2240 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2241 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2242 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2243 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2244 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2245 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2246 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2247 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2248 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2249 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2250 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2251 2252 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2253 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2254 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2255 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2256 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2257 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2258 2259 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2260 2261 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2262 if (ninfo) { 2263 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2264 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2265 mumps->ninfo = ninfo; 2266 for (i = 0; i < ninfo; i++) { 2267 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2268 mumps->info[i] = info[i]; 2269 } 2270 } 2271 PetscOptionsEnd(); 2272 PetscFunctionReturn(PETSC_SUCCESS); 2273 } 2274 2275 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2276 { 2277 PetscFunctionBegin; 2278 if (mumps->id.INFOG(1) < 0) { 2279 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2280 if (mumps->id.INFOG(1) == -6) { 2281 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2282 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2283 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2284 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2285 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2286 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2287 PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n")); 2288 } else { 2289 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2290 F->factorerrortype = MAT_FACTOR_OTHER; 2291 } 2292 } 2293 PetscFunctionReturn(PETSC_SUCCESS); 2294 } 2295 2296 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2297 { 2298 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2299 Vec b; 2300 const PetscInt M = A->rmap->N; 2301 2302 PetscFunctionBegin; 2303 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2304 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2305 PetscFunctionReturn(PETSC_SUCCESS); 2306 } 2307 2308 /* Set MUMPS options from the options database */ 2309 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2310 2311 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2312 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2313 2314 /* analysis phase */ 2315 mumps->id.job = JOB_FACTSYMBOLIC; 2316 mumps->id.n = M; 2317 switch (mumps->id.ICNTL(18)) { 2318 case 0: /* centralized assembled matrix input */ 2319 if (!mumps->myid) { 2320 mumps->id.nnz = mumps->nnz; 2321 mumps->id.irn = mumps->irn; 2322 mumps->id.jcn = mumps->jcn; 2323 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2324 if (r) { 2325 mumps->id.ICNTL(7) = 1; 2326 if (!mumps->myid) { 2327 const PetscInt *idx; 2328 PetscInt i; 2329 2330 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2331 PetscCall(ISGetIndices(r, &idx)); 2332 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2333 PetscCall(ISRestoreIndices(r, &idx)); 2334 } 2335 } 2336 } 2337 break; 2338 case 3: /* distributed assembled matrix input (size>1) */ 2339 mumps->id.nnz_loc = mumps->nnz; 2340 mumps->id.irn_loc = mumps->irn; 2341 mumps->id.jcn_loc = mumps->jcn; 2342 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2343 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2344 PetscCall(MatCreateVecs(A, NULL, &b)); 2345 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2346 PetscCall(VecDestroy(&b)); 2347 } 2348 break; 2349 } 2350 PetscMUMPS_c(mumps); 2351 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2352 2353 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2354 F->ops->solve = MatSolve_MUMPS; 2355 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2356 F->ops->matsolve = MatMatSolve_MUMPS; 2357 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2358 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2359 2360 mumps->matstruc = SAME_NONZERO_PATTERN; 2361 PetscFunctionReturn(PETSC_SUCCESS); 2362 } 2363 2364 /* Note the Petsc r and c permutations are ignored */ 2365 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2366 { 2367 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2368 Vec b; 2369 const PetscInt M = A->rmap->N; 2370 2371 PetscFunctionBegin; 2372 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2373 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2374 PetscFunctionReturn(PETSC_SUCCESS); 2375 } 2376 2377 /* Set MUMPS options from the options database */ 2378 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2379 2380 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2381 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2382 2383 /* analysis phase */ 2384 mumps->id.job = JOB_FACTSYMBOLIC; 2385 mumps->id.n = M; 2386 switch (mumps->id.ICNTL(18)) { 2387 case 0: /* centralized assembled matrix input */ 2388 if (!mumps->myid) { 2389 mumps->id.nnz = mumps->nnz; 2390 mumps->id.irn = mumps->irn; 2391 mumps->id.jcn = mumps->jcn; 2392 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2393 } 2394 break; 2395 case 3: /* distributed assembled matrix input (size>1) */ 2396 mumps->id.nnz_loc = mumps->nnz; 2397 mumps->id.irn_loc = mumps->irn; 2398 mumps->id.jcn_loc = mumps->jcn; 2399 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2400 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2401 PetscCall(MatCreateVecs(A, NULL, &b)); 2402 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2403 PetscCall(VecDestroy(&b)); 2404 } 2405 break; 2406 } 2407 PetscMUMPS_c(mumps); 2408 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2409 2410 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2411 F->ops->solve = MatSolve_MUMPS; 2412 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2413 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2414 2415 mumps->matstruc = SAME_NONZERO_PATTERN; 2416 PetscFunctionReturn(PETSC_SUCCESS); 2417 } 2418 2419 /* Note the Petsc r permutation and factor info are ignored */ 2420 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2421 { 2422 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2423 Vec b; 2424 const PetscInt M = A->rmap->N; 2425 2426 PetscFunctionBegin; 2427 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2428 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2429 PetscFunctionReturn(PETSC_SUCCESS); 2430 } 2431 2432 /* Set MUMPS options from the options database */ 2433 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2434 2435 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2436 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2437 2438 /* analysis phase */ 2439 mumps->id.job = JOB_FACTSYMBOLIC; 2440 mumps->id.n = M; 2441 switch (mumps->id.ICNTL(18)) { 2442 case 0: /* centralized assembled matrix input */ 2443 if (!mumps->myid) { 2444 mumps->id.nnz = mumps->nnz; 2445 mumps->id.irn = mumps->irn; 2446 mumps->id.jcn = mumps->jcn; 2447 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2448 } 2449 break; 2450 case 3: /* distributed assembled matrix input (size>1) */ 2451 mumps->id.nnz_loc = mumps->nnz; 2452 mumps->id.irn_loc = mumps->irn; 2453 mumps->id.jcn_loc = mumps->jcn; 2454 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2455 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2456 PetscCall(MatCreateVecs(A, NULL, &b)); 2457 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2458 PetscCall(VecDestroy(&b)); 2459 } 2460 break; 2461 } 2462 PetscMUMPS_c(mumps); 2463 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2464 2465 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2466 F->ops->solve = MatSolve_MUMPS; 2467 F->ops->solvetranspose = MatSolve_MUMPS; 2468 F->ops->matsolve = MatMatSolve_MUMPS; 2469 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2470 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2471 #if defined(PETSC_USE_COMPLEX) 2472 F->ops->getinertia = NULL; 2473 #else 2474 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2475 #endif 2476 2477 mumps->matstruc = SAME_NONZERO_PATTERN; 2478 PetscFunctionReturn(PETSC_SUCCESS); 2479 } 2480 2481 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2482 { 2483 PetscBool iascii; 2484 PetscViewerFormat format; 2485 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2486 2487 PetscFunctionBegin; 2488 /* check if matrix is mumps type */ 2489 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2490 2491 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2492 if (iascii) { 2493 PetscCall(PetscViewerGetFormat(viewer, &format)); 2494 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2495 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2496 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2497 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2498 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2499 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2500 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2501 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2502 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2503 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2504 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2505 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2506 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2507 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2508 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2509 if (mumps->id.ICNTL(11) > 0) { 2510 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2511 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2512 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2513 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2514 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2515 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2516 } 2517 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2518 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2519 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2520 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2521 /* ICNTL(15-17) not used */ 2522 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2523 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2524 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2525 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2526 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2527 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2528 2529 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2530 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2531 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2532 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2533 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2534 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2535 2536 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2537 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2538 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2539 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2540 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2541 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2542 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2543 2544 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2545 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2546 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2547 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2548 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2549 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2550 2551 /* information local to each processor */ 2552 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2553 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2554 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2555 PetscCall(PetscViewerFlush(viewer)); 2556 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2557 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2558 PetscCall(PetscViewerFlush(viewer)); 2559 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2560 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2561 PetscCall(PetscViewerFlush(viewer)); 2562 2563 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2564 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2565 PetscCall(PetscViewerFlush(viewer)); 2566 2567 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2568 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2569 PetscCall(PetscViewerFlush(viewer)); 2570 2571 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2572 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2573 PetscCall(PetscViewerFlush(viewer)); 2574 2575 if (mumps->ninfo && mumps->ninfo <= 80) { 2576 PetscInt i; 2577 for (i = 0; i < mumps->ninfo; i++) { 2578 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2579 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2580 PetscCall(PetscViewerFlush(viewer)); 2581 } 2582 } 2583 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2584 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2585 2586 if (mumps->myid == 0) { /* information from the host */ 2587 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2588 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2589 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2590 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2591 2592 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2593 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2594 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2595 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2596 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2597 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2598 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2599 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2600 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2601 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2602 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2603 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2604 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2605 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2606 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2607 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2608 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2609 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2610 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2611 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2612 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2613 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2614 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2615 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2616 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2617 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2618 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2619 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2620 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2621 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2622 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2623 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2624 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2625 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2626 } 2627 } 2628 } 2629 PetscFunctionReturn(PETSC_SUCCESS); 2630 } 2631 2632 static PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2633 { 2634 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2635 2636 PetscFunctionBegin; 2637 info->block_size = 1.0; 2638 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2639 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2640 info->nz_unneeded = 0.0; 2641 info->assemblies = 0.0; 2642 info->mallocs = 0.0; 2643 info->memory = 0.0; 2644 info->fill_ratio_given = 0; 2645 info->fill_ratio_needed = 0; 2646 info->factor_mallocs = 0; 2647 PetscFunctionReturn(PETSC_SUCCESS); 2648 } 2649 2650 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2651 { 2652 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2653 const PetscScalar *arr; 2654 const PetscInt *idxs; 2655 PetscInt size, i; 2656 2657 PetscFunctionBegin; 2658 PetscCall(ISGetLocalSize(is, &size)); 2659 /* Schur complement matrix */ 2660 PetscCall(MatDestroy(&F->schur)); 2661 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2662 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2663 mumps->id.schur = (MumpsScalar *)arr; 2664 mumps->id.size_schur = size; 2665 mumps->id.schur_lld = size; 2666 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2667 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2668 2669 /* MUMPS expects Fortran style indices */ 2670 PetscCall(PetscFree(mumps->id.listvar_schur)); 2671 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2672 PetscCall(ISGetIndices(is, &idxs)); 2673 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2674 PetscCall(ISRestoreIndices(is, &idxs)); 2675 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2676 mumps->id.ICNTL(26) = -1; 2677 PetscFunctionReturn(PETSC_SUCCESS); 2678 } 2679 2680 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2681 { 2682 Mat St; 2683 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2684 PetscScalar *array; 2685 2686 PetscFunctionBegin; 2687 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 2688 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2689 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2690 PetscCall(MatSetType(St, MATDENSE)); 2691 PetscCall(MatSetUp(St)); 2692 PetscCall(MatDenseGetArray(St, &array)); 2693 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2694 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2695 PetscInt i, j, N = mumps->id.size_schur; 2696 for (i = 0; i < N; i++) { 2697 for (j = 0; j < N; j++) { 2698 #if !defined(PETSC_USE_COMPLEX) 2699 PetscScalar val = mumps->id.schur[i * N + j]; 2700 #else 2701 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2702 #endif 2703 array[j * N + i] = val; 2704 } 2705 } 2706 } else { /* stored by columns */ 2707 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2708 } 2709 } else { /* either full or lower-triangular (not packed) */ 2710 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2711 PetscInt i, j, N = mumps->id.size_schur; 2712 for (i = 0; i < N; i++) { 2713 for (j = i; j < N; j++) { 2714 #if !defined(PETSC_USE_COMPLEX) 2715 PetscScalar val = mumps->id.schur[i * N + j]; 2716 #else 2717 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2718 #endif 2719 array[i * N + j] = array[j * N + i] = val; 2720 } 2721 } 2722 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2723 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2724 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2725 PetscInt i, j, N = mumps->id.size_schur; 2726 for (i = 0; i < N; i++) { 2727 for (j = 0; j < i + 1; j++) { 2728 #if !defined(PETSC_USE_COMPLEX) 2729 PetscScalar val = mumps->id.schur[i * N + j]; 2730 #else 2731 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2732 #endif 2733 array[i * N + j] = array[j * N + i] = val; 2734 } 2735 } 2736 } 2737 } 2738 PetscCall(MatDenseRestoreArray(St, &array)); 2739 *S = St; 2740 PetscFunctionReturn(PETSC_SUCCESS); 2741 } 2742 2743 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2744 { 2745 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2746 2747 PetscFunctionBegin; 2748 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2749 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2750 for (i = 0; i < nICNTL_pre; ++i) 2751 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2752 if (i == nICNTL_pre) { /* not already cached */ 2753 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2754 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2755 mumps->ICNTL_pre[0]++; 2756 } 2757 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2758 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2759 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2760 PetscFunctionReturn(PETSC_SUCCESS); 2761 } 2762 2763 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2764 { 2765 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2766 2767 PetscFunctionBegin; 2768 if (mumps->id.job == JOB_NULL) { 2769 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2770 *ival = 0; 2771 for (i = 0; i < nICNTL_pre; ++i) { 2772 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2773 } 2774 } else *ival = mumps->id.ICNTL(icntl); 2775 PetscFunctionReturn(PETSC_SUCCESS); 2776 } 2777 2778 /*@ 2779 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2780 2781 Logically Collective 2782 2783 Input Parameters: 2784 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2785 . icntl - index of MUMPS parameter array ICNTL() 2786 - ival - value of MUMPS ICNTL(icntl) 2787 2788 Options Database Key: 2789 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2790 2791 Level: beginner 2792 2793 References: 2794 . * - MUMPS Users' Guide 2795 2796 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2797 @*/ 2798 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2799 { 2800 PetscFunctionBegin; 2801 PetscValidType(F, 1); 2802 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2803 PetscValidLogicalCollectiveInt(F, icntl, 2); 2804 PetscValidLogicalCollectiveInt(F, ival, 3); 2805 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2806 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2807 PetscFunctionReturn(PETSC_SUCCESS); 2808 } 2809 2810 /*@ 2811 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2812 2813 Logically Collective 2814 2815 Input Parameters: 2816 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2817 - icntl - index of MUMPS parameter array ICNTL() 2818 2819 Output Parameter: 2820 . ival - value of MUMPS ICNTL(icntl) 2821 2822 Level: beginner 2823 2824 References: 2825 . * - MUMPS Users' Guide 2826 2827 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2828 @*/ 2829 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2830 { 2831 PetscFunctionBegin; 2832 PetscValidType(F, 1); 2833 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2834 PetscValidLogicalCollectiveInt(F, icntl, 2); 2835 PetscAssertPointer(ival, 3); 2836 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2837 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2838 PetscFunctionReturn(PETSC_SUCCESS); 2839 } 2840 2841 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2842 { 2843 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2844 2845 PetscFunctionBegin; 2846 if (mumps->id.job == JOB_NULL) { 2847 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2848 for (i = 0; i < nCNTL_pre; ++i) 2849 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2850 if (i == nCNTL_pre) { 2851 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2852 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2853 mumps->CNTL_pre[0]++; 2854 } 2855 mumps->CNTL_pre[1 + 2 * i] = icntl; 2856 mumps->CNTL_pre[2 + 2 * i] = val; 2857 } else mumps->id.CNTL(icntl) = val; 2858 PetscFunctionReturn(PETSC_SUCCESS); 2859 } 2860 2861 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2862 { 2863 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2864 2865 PetscFunctionBegin; 2866 if (mumps->id.job == JOB_NULL) { 2867 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2868 *val = 0.0; 2869 for (i = 0; i < nCNTL_pre; ++i) { 2870 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2871 } 2872 } else *val = mumps->id.CNTL(icntl); 2873 PetscFunctionReturn(PETSC_SUCCESS); 2874 } 2875 2876 /*@ 2877 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2878 2879 Logically Collective 2880 2881 Input Parameters: 2882 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2883 . icntl - index of MUMPS parameter array CNTL() 2884 - val - value of MUMPS CNTL(icntl) 2885 2886 Options Database Key: 2887 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2888 2889 Level: beginner 2890 2891 References: 2892 . * - MUMPS Users' Guide 2893 2894 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2895 @*/ 2896 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2897 { 2898 PetscFunctionBegin; 2899 PetscValidType(F, 1); 2900 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2901 PetscValidLogicalCollectiveInt(F, icntl, 2); 2902 PetscValidLogicalCollectiveReal(F, val, 3); 2903 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2904 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2905 PetscFunctionReturn(PETSC_SUCCESS); 2906 } 2907 2908 /*@ 2909 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2910 2911 Logically Collective 2912 2913 Input Parameters: 2914 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2915 - icntl - index of MUMPS parameter array CNTL() 2916 2917 Output Parameter: 2918 . val - value of MUMPS CNTL(icntl) 2919 2920 Level: beginner 2921 2922 References: 2923 . * - MUMPS Users' Guide 2924 2925 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2926 @*/ 2927 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2928 { 2929 PetscFunctionBegin; 2930 PetscValidType(F, 1); 2931 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2932 PetscValidLogicalCollectiveInt(F, icntl, 2); 2933 PetscAssertPointer(val, 3); 2934 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2935 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2936 PetscFunctionReturn(PETSC_SUCCESS); 2937 } 2938 2939 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2940 { 2941 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2942 2943 PetscFunctionBegin; 2944 *info = mumps->id.INFO(icntl); 2945 PetscFunctionReturn(PETSC_SUCCESS); 2946 } 2947 2948 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2949 { 2950 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2951 2952 PetscFunctionBegin; 2953 *infog = mumps->id.INFOG(icntl); 2954 PetscFunctionReturn(PETSC_SUCCESS); 2955 } 2956 2957 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2958 { 2959 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2960 2961 PetscFunctionBegin; 2962 *rinfo = mumps->id.RINFO(icntl); 2963 PetscFunctionReturn(PETSC_SUCCESS); 2964 } 2965 2966 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2967 { 2968 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2969 2970 PetscFunctionBegin; 2971 *rinfog = mumps->id.RINFOG(icntl); 2972 PetscFunctionReturn(PETSC_SUCCESS); 2973 } 2974 2975 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2976 { 2977 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2978 2979 PetscFunctionBegin; 2980 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2981 *size = 0; 2982 *array = NULL; 2983 if (!mumps->myid) { 2984 *size = mumps->id.INFOG(28); 2985 PetscCall(PetscMalloc1(*size, array)); 2986 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2987 } 2988 PetscFunctionReturn(PETSC_SUCCESS); 2989 } 2990 2991 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2992 { 2993 Mat Bt = NULL, Btseq = NULL; 2994 PetscBool flg; 2995 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2996 PetscScalar *aa; 2997 PetscInt spnr, *ia, *ja, M, nrhs; 2998 2999 PetscFunctionBegin; 3000 PetscAssertPointer(spRHS, 2); 3001 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 3002 if (flg) { 3003 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 3004 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 3005 3006 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3007 3008 if (mumps->petsc_size > 1) { 3009 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 3010 Btseq = b->A; 3011 } else { 3012 Btseq = Bt; 3013 } 3014 3015 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 3016 mumps->id.nrhs = nrhs; 3017 mumps->id.lrhs = M; 3018 mumps->id.rhs = NULL; 3019 3020 if (!mumps->myid) { 3021 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 3022 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3023 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3024 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3025 mumps->id.rhs_sparse = (MumpsScalar *)aa; 3026 } else { 3027 mumps->id.irhs_ptr = NULL; 3028 mumps->id.irhs_sparse = NULL; 3029 mumps->id.nz_rhs = 0; 3030 mumps->id.rhs_sparse = NULL; 3031 } 3032 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3033 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3034 3035 /* solve phase */ 3036 mumps->id.job = JOB_SOLVE; 3037 PetscMUMPS_c(mumps); 3038 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 3039 3040 if (!mumps->myid) { 3041 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3042 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3043 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3044 } 3045 PetscFunctionReturn(PETSC_SUCCESS); 3046 } 3047 3048 /*@ 3049 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 3050 3051 Logically Collective 3052 3053 Input Parameter: 3054 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3055 3056 Output Parameter: 3057 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3058 3059 Level: beginner 3060 3061 References: 3062 . * - MUMPS Users' Guide 3063 3064 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3065 @*/ 3066 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3067 { 3068 PetscFunctionBegin; 3069 PetscValidType(F, 1); 3070 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3071 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3072 PetscFunctionReturn(PETSC_SUCCESS); 3073 } 3074 3075 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3076 { 3077 Mat spRHS; 3078 3079 PetscFunctionBegin; 3080 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3081 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3082 PetscCall(MatDestroy(&spRHS)); 3083 PetscFunctionReturn(PETSC_SUCCESS); 3084 } 3085 3086 /*@ 3087 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 3088 3089 Logically Collective 3090 3091 Input Parameter: 3092 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3093 3094 Output Parameter: 3095 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3096 3097 Level: beginner 3098 3099 References: 3100 . * - MUMPS Users' Guide 3101 3102 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3103 @*/ 3104 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3105 { 3106 PetscBool flg; 3107 3108 PetscFunctionBegin; 3109 PetscValidType(F, 1); 3110 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3111 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3112 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3113 3114 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3115 PetscFunctionReturn(PETSC_SUCCESS); 3116 } 3117 3118 /*@ 3119 MatMumpsGetInfo - Get MUMPS parameter INFO() 3120 3121 Logically Collective 3122 3123 Input Parameters: 3124 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3125 - icntl - index of MUMPS parameter array INFO() 3126 3127 Output Parameter: 3128 . ival - value of MUMPS INFO(icntl) 3129 3130 Level: beginner 3131 3132 References: 3133 . * - MUMPS Users' Guide 3134 3135 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3136 @*/ 3137 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3138 { 3139 PetscFunctionBegin; 3140 PetscValidType(F, 1); 3141 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3142 PetscAssertPointer(ival, 3); 3143 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3144 PetscFunctionReturn(PETSC_SUCCESS); 3145 } 3146 3147 /*@ 3148 MatMumpsGetInfog - Get MUMPS parameter INFOG() 3149 3150 Logically Collective 3151 3152 Input Parameters: 3153 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3154 - icntl - index of MUMPS parameter array INFOG() 3155 3156 Output Parameter: 3157 . ival - value of MUMPS INFOG(icntl) 3158 3159 Level: beginner 3160 3161 References: 3162 . * - MUMPS Users' Guide 3163 3164 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3165 @*/ 3166 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3167 { 3168 PetscFunctionBegin; 3169 PetscValidType(F, 1); 3170 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3171 PetscAssertPointer(ival, 3); 3172 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3173 PetscFunctionReturn(PETSC_SUCCESS); 3174 } 3175 3176 /*@ 3177 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3178 3179 Logically Collective 3180 3181 Input Parameters: 3182 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3183 - icntl - index of MUMPS parameter array RINFO() 3184 3185 Output Parameter: 3186 . val - value of MUMPS RINFO(icntl) 3187 3188 Level: beginner 3189 3190 References: 3191 . * - MUMPS Users' Guide 3192 3193 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3194 @*/ 3195 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3196 { 3197 PetscFunctionBegin; 3198 PetscValidType(F, 1); 3199 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3200 PetscAssertPointer(val, 3); 3201 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3202 PetscFunctionReturn(PETSC_SUCCESS); 3203 } 3204 3205 /*@ 3206 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3207 3208 Logically Collective 3209 3210 Input Parameters: 3211 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3212 - icntl - index of MUMPS parameter array RINFOG() 3213 3214 Output Parameter: 3215 . val - value of MUMPS RINFOG(icntl) 3216 3217 Level: beginner 3218 3219 References: 3220 . * - MUMPS Users' Guide 3221 3222 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3223 @*/ 3224 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3225 { 3226 PetscFunctionBegin; 3227 PetscValidType(F, 1); 3228 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3229 PetscAssertPointer(val, 3); 3230 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3231 PetscFunctionReturn(PETSC_SUCCESS); 3232 } 3233 3234 /*@ 3235 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 3236 3237 Logically Collective 3238 3239 Input Parameter: 3240 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3241 3242 Output Parameters: 3243 + size - local size of the array. The size of the array is non-zero only on the host. 3244 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3245 for freeing this array. 3246 3247 Level: beginner 3248 3249 References: 3250 . * - MUMPS Users' Guide 3251 3252 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3253 @*/ 3254 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3255 { 3256 PetscFunctionBegin; 3257 PetscValidType(F, 1); 3258 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3259 PetscAssertPointer(size, 2); 3260 PetscAssertPointer(array, 3); 3261 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3262 PetscFunctionReturn(PETSC_SUCCESS); 3263 } 3264 3265 /*MC 3266 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3267 distributed and sequential matrices via the external package MUMPS. 3268 3269 Works with `MATAIJ` and `MATSBAIJ` matrices 3270 3271 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3272 3273 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3274 See details below. 3275 3276 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3277 3278 Options Database Keys: 3279 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3280 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3281 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3282 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3283 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3284 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3285 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3286 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3287 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3288 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3289 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3290 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3291 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3292 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3293 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3294 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3295 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3296 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3297 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3298 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3299 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3300 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 3301 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3302 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3303 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3304 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3305 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3306 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3307 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3308 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3309 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3310 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3311 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3312 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3313 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3314 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3315 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3316 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3317 3318 Level: beginner 3319 3320 Notes: 3321 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 3322 error if the matrix is Hermitian. 3323 3324 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3325 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3326 3327 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3328 the failure with 3329 .vb 3330 KSPGetPC(ksp,&pc); 3331 PCFactorGetMatrix(pc,&mat); 3332 MatMumpsGetInfo(mat,....); 3333 MatMumpsGetInfog(mat,....); etc. 3334 .ve 3335 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3336 3337 MUMPS provides 64-bit integer support in two build modes: 3338 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3339 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3340 3341 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3342 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3343 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3344 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3345 3346 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3347 3348 Two modes to run MUMPS/PETSc with OpenMP 3349 .vb 3350 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3351 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3352 .ve 3353 3354 .vb 3355 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3356 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3357 .ve 3358 3359 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3360 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3361 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3362 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3363 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3364 3365 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3366 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3367 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3368 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3369 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3370 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3371 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3372 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3373 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3374 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3375 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3376 examine the mapping result. 3377 3378 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3379 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3380 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3381 3382 References: 3383 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3384 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3385 3386 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3387 M*/ 3388 3389 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3390 { 3391 PetscFunctionBegin; 3392 *type = MATSOLVERMUMPS; 3393 PetscFunctionReturn(PETSC_SUCCESS); 3394 } 3395 3396 /* MatGetFactor for Seq and MPI AIJ matrices */ 3397 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3398 { 3399 Mat B; 3400 Mat_MUMPS *mumps; 3401 PetscBool isSeqAIJ, isDiag, isDense; 3402 PetscMPIInt size; 3403 3404 PetscFunctionBegin; 3405 #if defined(PETSC_USE_COMPLEX) 3406 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3407 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3408 *F = NULL; 3409 PetscFunctionReturn(PETSC_SUCCESS); 3410 } 3411 #endif 3412 /* Create the factorization matrix */ 3413 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3414 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 3415 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3416 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3417 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3418 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3419 PetscCall(MatSetUp(B)); 3420 3421 PetscCall(PetscNew(&mumps)); 3422 3423 B->ops->view = MatView_MUMPS; 3424 B->ops->getinfo = MatGetInfo_MUMPS; 3425 3426 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3427 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3428 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3429 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3430 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3431 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3432 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3433 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3434 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3435 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3436 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3437 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3438 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3439 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3440 3441 if (ftype == MAT_FACTOR_LU) { 3442 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3443 B->factortype = MAT_FACTOR_LU; 3444 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3445 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3446 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3447 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3448 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3449 mumps->sym = 0; 3450 } else { 3451 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3452 B->factortype = MAT_FACTOR_CHOLESKY; 3453 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3454 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3455 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3456 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3457 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3458 #if defined(PETSC_USE_COMPLEX) 3459 mumps->sym = 2; 3460 #else 3461 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3462 else mumps->sym = 2; 3463 #endif 3464 } 3465 3466 /* set solvertype */ 3467 PetscCall(PetscFree(B->solvertype)); 3468 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3469 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3470 if (size == 1) { 3471 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3472 B->canuseordering = PETSC_TRUE; 3473 } 3474 B->ops->destroy = MatDestroy_MUMPS; 3475 B->data = (void *)mumps; 3476 3477 *F = B; 3478 mumps->id.job = JOB_NULL; 3479 mumps->ICNTL_pre = NULL; 3480 mumps->CNTL_pre = NULL; 3481 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3482 PetscFunctionReturn(PETSC_SUCCESS); 3483 } 3484 3485 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3486 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3487 { 3488 Mat B; 3489 Mat_MUMPS *mumps; 3490 PetscBool isSeqSBAIJ; 3491 PetscMPIInt size; 3492 3493 PetscFunctionBegin; 3494 #if defined(PETSC_USE_COMPLEX) 3495 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3496 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3497 *F = NULL; 3498 PetscFunctionReturn(PETSC_SUCCESS); 3499 } 3500 #endif 3501 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3502 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3503 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3504 PetscCall(MatSetUp(B)); 3505 3506 PetscCall(PetscNew(&mumps)); 3507 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3508 if (isSeqSBAIJ) { 3509 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3510 } else { 3511 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3512 } 3513 3514 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3515 B->ops->view = MatView_MUMPS; 3516 B->ops->getinfo = MatGetInfo_MUMPS; 3517 3518 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3519 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3520 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3521 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3522 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3523 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3524 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3525 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3526 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3527 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3528 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3529 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3530 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3531 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3532 3533 B->factortype = MAT_FACTOR_CHOLESKY; 3534 #if defined(PETSC_USE_COMPLEX) 3535 mumps->sym = 2; 3536 #else 3537 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3538 else mumps->sym = 2; 3539 #endif 3540 3541 /* set solvertype */ 3542 PetscCall(PetscFree(B->solvertype)); 3543 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3544 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3545 if (size == 1) { 3546 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3547 B->canuseordering = PETSC_TRUE; 3548 } 3549 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3550 B->ops->destroy = MatDestroy_MUMPS; 3551 B->data = (void *)mumps; 3552 3553 *F = B; 3554 mumps->id.job = JOB_NULL; 3555 mumps->ICNTL_pre = NULL; 3556 mumps->CNTL_pre = NULL; 3557 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3558 PetscFunctionReturn(PETSC_SUCCESS); 3559 } 3560 3561 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3562 { 3563 Mat B; 3564 Mat_MUMPS *mumps; 3565 PetscBool isSeqBAIJ; 3566 PetscMPIInt size; 3567 3568 PetscFunctionBegin; 3569 /* Create the factorization matrix */ 3570 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3571 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3572 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3573 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3574 PetscCall(MatSetUp(B)); 3575 3576 PetscCall(PetscNew(&mumps)); 3577 if (ftype == MAT_FACTOR_LU) { 3578 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3579 B->factortype = MAT_FACTOR_LU; 3580 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3581 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3582 mumps->sym = 0; 3583 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3584 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3585 3586 B->ops->view = MatView_MUMPS; 3587 B->ops->getinfo = MatGetInfo_MUMPS; 3588 3589 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3590 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3591 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3592 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3593 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3594 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3595 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3596 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3597 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3598 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3599 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3600 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3601 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3602 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3603 3604 /* set solvertype */ 3605 PetscCall(PetscFree(B->solvertype)); 3606 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3607 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3608 if (size == 1) { 3609 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3610 B->canuseordering = PETSC_TRUE; 3611 } 3612 B->ops->destroy = MatDestroy_MUMPS; 3613 B->data = (void *)mumps; 3614 3615 *F = B; 3616 mumps->id.job = JOB_NULL; 3617 mumps->ICNTL_pre = NULL; 3618 mumps->CNTL_pre = NULL; 3619 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3620 PetscFunctionReturn(PETSC_SUCCESS); 3621 } 3622 3623 /* MatGetFactor for Seq and MPI SELL matrices */ 3624 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3625 { 3626 Mat B; 3627 Mat_MUMPS *mumps; 3628 PetscBool isSeqSELL; 3629 PetscMPIInt size; 3630 3631 PetscFunctionBegin; 3632 /* Create the factorization matrix */ 3633 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3634 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3635 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3636 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3637 PetscCall(MatSetUp(B)); 3638 3639 PetscCall(PetscNew(&mumps)); 3640 3641 B->ops->view = MatView_MUMPS; 3642 B->ops->getinfo = MatGetInfo_MUMPS; 3643 3644 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3645 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3646 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3647 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3648 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3649 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3650 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3651 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3652 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3653 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3654 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3655 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3656 3657 if (ftype == MAT_FACTOR_LU) { 3658 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3659 B->factortype = MAT_FACTOR_LU; 3660 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3661 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3662 mumps->sym = 0; 3663 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3664 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3665 3666 /* set solvertype */ 3667 PetscCall(PetscFree(B->solvertype)); 3668 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3669 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3670 if (size == 1) { 3671 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3672 B->canuseordering = PETSC_TRUE; 3673 } 3674 B->ops->destroy = MatDestroy_MUMPS; 3675 B->data = (void *)mumps; 3676 3677 *F = B; 3678 mumps->id.job = JOB_NULL; 3679 mumps->ICNTL_pre = NULL; 3680 mumps->CNTL_pre = NULL; 3681 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3682 PetscFunctionReturn(PETSC_SUCCESS); 3683 } 3684 3685 /* MatGetFactor for MATNEST matrices */ 3686 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3687 { 3688 Mat B, **mats; 3689 Mat_MUMPS *mumps; 3690 PetscInt nr, nc; 3691 PetscMPIInt size; 3692 PetscBool flg = PETSC_TRUE; 3693 3694 PetscFunctionBegin; 3695 #if defined(PETSC_USE_COMPLEX) 3696 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3697 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3698 *F = NULL; 3699 PetscFunctionReturn(PETSC_SUCCESS); 3700 } 3701 #endif 3702 3703 /* Return if some condition is not satisfied */ 3704 *F = NULL; 3705 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3706 if (ftype == MAT_FACTOR_CHOLESKY) { 3707 IS *rows, *cols; 3708 PetscInt *m, *M; 3709 3710 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3711 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3712 PetscCall(MatNestGetISs(A, rows, cols)); 3713 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3714 if (!flg) { 3715 PetscCall(PetscFree2(rows, cols)); 3716 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3717 PetscFunctionReturn(PETSC_SUCCESS); 3718 } 3719 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3720 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3721 for (PetscInt r = 0; flg && r < nr; r++) 3722 for (PetscInt k = r + 1; flg && k < nr; k++) 3723 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3724 PetscCall(PetscFree2(m, M)); 3725 PetscCall(PetscFree2(rows, cols)); 3726 if (!flg) { 3727 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3728 PetscFunctionReturn(PETSC_SUCCESS); 3729 } 3730 } 3731 3732 for (PetscInt r = 0; r < nr; r++) { 3733 for (PetscInt c = 0; c < nc; c++) { 3734 Mat sub = mats[r][c]; 3735 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isDiag, isDense; 3736 3737 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3738 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3739 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3740 else { 3741 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3742 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3743 } 3744 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3745 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3746 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3747 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3748 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3749 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3750 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 3751 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3752 if (ftype == MAT_FACTOR_CHOLESKY) { 3753 if (r == c) { 3754 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 3755 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3756 flg = PETSC_FALSE; 3757 } 3758 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3759 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3760 flg = PETSC_FALSE; 3761 } 3762 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3763 PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3764 flg = PETSC_FALSE; 3765 } 3766 } 3767 } 3768 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3769 3770 /* Create the factorization matrix */ 3771 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3772 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3773 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3774 PetscCall(MatSetUp(B)); 3775 3776 PetscCall(PetscNew(&mumps)); 3777 3778 B->ops->view = MatView_MUMPS; 3779 B->ops->getinfo = MatGetInfo_MUMPS; 3780 3781 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3782 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3783 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3784 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3785 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3786 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3787 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3788 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3789 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3790 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3791 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3792 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3793 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3794 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3795 3796 if (ftype == MAT_FACTOR_LU) { 3797 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3798 B->factortype = MAT_FACTOR_LU; 3799 mumps->sym = 0; 3800 } else { 3801 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3802 B->factortype = MAT_FACTOR_CHOLESKY; 3803 #if defined(PETSC_USE_COMPLEX) 3804 mumps->sym = 2; 3805 #else 3806 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3807 else mumps->sym = 2; 3808 #endif 3809 } 3810 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3811 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3812 3813 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3814 if (size == 1) { 3815 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3816 B->canuseordering = PETSC_TRUE; 3817 } 3818 3819 /* set solvertype */ 3820 PetscCall(PetscFree(B->solvertype)); 3821 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3822 B->ops->destroy = MatDestroy_MUMPS; 3823 B->data = (void *)mumps; 3824 3825 *F = B; 3826 mumps->id.job = JOB_NULL; 3827 mumps->ICNTL_pre = NULL; 3828 mumps->CNTL_pre = NULL; 3829 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3830 PetscFunctionReturn(PETSC_SUCCESS); 3831 } 3832 3833 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3834 { 3835 PetscFunctionBegin; 3836 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3837 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3838 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3839 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3840 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3841 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3842 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3843 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3844 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3845 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3846 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3847 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3848 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3849 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3850 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3851 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3852 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3853 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3854 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3855 PetscFunctionReturn(PETSC_SUCCESS); 3856 } 3857