1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #if defined(PETSC_USE_REAL_SINGLE) 15 #include <cmumps_c.h> 16 #else 17 #include <zmumps_c.h> 18 #endif 19 #else 20 #if defined(PETSC_USE_REAL_SINGLE) 21 #include <smumps_c.h> 22 #else 23 #include <dmumps_c.h> 24 #endif 25 #endif 26 EXTERN_C_END 27 #define JOB_INIT -1 28 #define JOB_NULL 0 29 #define JOB_FACTSYMBOLIC 1 30 #define JOB_FACTNUMERIC 2 31 #define JOB_SOLVE 3 32 #define JOB_END -2 33 34 /* calls to MUMPS */ 35 #if defined(PETSC_USE_COMPLEX) 36 #if defined(PETSC_USE_REAL_SINGLE) 37 #define MUMPS_c cmumps_c 38 #else 39 #define MUMPS_c zmumps_c 40 #endif 41 #else 42 #if defined(PETSC_USE_REAL_SINGLE) 43 #define MUMPS_c smumps_c 44 #else 45 #define MUMPS_c dmumps_c 46 #endif 47 #endif 48 49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 50 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 51 naming convention in PetscMPIInt, PetscBLASInt etc. 52 */ 53 typedef MUMPS_INT PetscMUMPSInt; 54 55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 56 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 57 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 58 #endif 59 #else 60 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 61 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 62 #endif 63 #endif 64 65 #define MPIU_MUMPSINT MPI_INT 66 #define PETSC_MUMPS_INT_MAX 2147483647 67 #define PETSC_MUMPS_INT_MIN -2147483648 68 69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 70 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 71 { 72 PetscFunctionBegin; 73 #if PetscDefined(USE_64BIT_INDICES) 74 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 75 #endif 76 *b = (PetscMUMPSInt)(a); 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 /* Put these utility routines here since they are only used in this file */ 81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 82 { 83 PetscInt myval; 84 PetscBool myset; 85 PetscFunctionBegin; 86 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 87 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 88 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 89 if (set) *set = myset; 90 PetscFunctionReturn(PETSC_SUCCESS); 91 } 92 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 93 94 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 95 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 96 #define PetscMUMPS_c(mumps) \ 97 do { \ 98 if (mumps->use_petsc_omp_support) { \ 99 if (mumps->is_omp_master) { \ 100 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 101 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 102 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 103 PetscCall(PetscFPTrapPop()); \ 104 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 105 } \ 106 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 107 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 108 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 109 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 110 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 111 */ \ 112 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 113 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 115 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 116 } else { \ 117 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 118 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 119 PetscCall(PetscFPTrapPop()); \ 120 } \ 121 } while (0) 122 #else 123 #define PetscMUMPS_c(mumps) \ 124 do { \ 125 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 126 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 127 PetscCall(PetscFPTrapPop()); \ 128 } while (0) 129 #endif 130 131 /* declare MumpsScalar */ 132 #if defined(PETSC_USE_COMPLEX) 133 #if defined(PETSC_USE_REAL_SINGLE) 134 #define MumpsScalar mumps_complex 135 #else 136 #define MumpsScalar mumps_double_complex 137 #endif 138 #else 139 #define MumpsScalar PetscScalar 140 #endif 141 142 /* macros s.t. indices match MUMPS documentation */ 143 #define ICNTL(I) icntl[(I)-1] 144 #define CNTL(I) cntl[(I)-1] 145 #define INFOG(I) infog[(I)-1] 146 #define INFO(I) info[(I)-1] 147 #define RINFOG(I) rinfog[(I)-1] 148 #define RINFO(I) rinfo[(I)-1] 149 150 typedef struct Mat_MUMPS Mat_MUMPS; 151 struct Mat_MUMPS { 152 #if defined(PETSC_USE_COMPLEX) 153 #if defined(PETSC_USE_REAL_SINGLE) 154 CMUMPS_STRUC_C id; 155 #else 156 ZMUMPS_STRUC_C id; 157 #endif 158 #else 159 #if defined(PETSC_USE_REAL_SINGLE) 160 SMUMPS_STRUC_C id; 161 #else 162 DMUMPS_STRUC_C id; 163 #endif 164 #endif 165 166 MatStructure matstruc; 167 PetscMPIInt myid, petsc_size; 168 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 169 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 170 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 171 PetscMUMPSInt sym; 172 MPI_Comm mumps_comm; 173 PetscMUMPSInt *ICNTL_pre; 174 PetscReal *CNTL_pre; 175 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 176 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 177 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 178 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 179 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 180 PetscInt *rhs_nrow, max_nrhs; 181 PetscMPIInt *rhs_recvcounts, *rhs_disps; 182 PetscScalar *rhs_loc, *rhs_recvbuf; 183 #endif 184 Vec b_seq, x_seq; 185 PetscInt ninfo, *info; /* which INFO to display */ 186 PetscInt sizeredrhs; 187 PetscScalar *schur_sol; 188 PetscInt schur_sizesol; 189 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 190 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 191 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 192 193 /* Support for MATNEST */ 194 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 195 PetscInt64 *nest_vals_start; 196 PetscScalar *nest_vals; 197 198 /* stuff used by petsc/mumps OpenMP support*/ 199 PetscBool use_petsc_omp_support; 200 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 201 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 202 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 203 PetscMPIInt tag, omp_comm_size; 204 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 205 MPI_Request *reqs; 206 }; 207 208 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 209 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 210 */ 211 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 212 { 213 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 214 215 PetscFunctionBegin; 216 #if defined(PETSC_USE_64BIT_INDICES) 217 { 218 PetscInt i; 219 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 220 PetscCall(PetscFree(mumps->ia_alloc)); 221 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 222 mumps->cur_ilen = nrow + 1; 223 } 224 if (nnz > mumps->cur_jlen) { 225 PetscCall(PetscFree(mumps->ja_alloc)); 226 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 227 mumps->cur_jlen = nnz; 228 } 229 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 230 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 231 *ia_mumps = mumps->ia_alloc; 232 *ja_mumps = mumps->ja_alloc; 233 } 234 #else 235 *ia_mumps = ia; 236 *ja_mumps = ja; 237 #endif 238 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 243 { 244 PetscFunctionBegin; 245 PetscCall(PetscFree(mumps->id.listvar_schur)); 246 PetscCall(PetscFree(mumps->id.redrhs)); 247 PetscCall(PetscFree(mumps->schur_sol)); 248 mumps->id.size_schur = 0; 249 mumps->id.schur_lld = 0; 250 mumps->id.ICNTL(19) = 0; 251 PetscFunctionReturn(PETSC_SUCCESS); 252 } 253 254 /* solve with rhs in mumps->id.redrhs and return in the same location */ 255 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 256 { 257 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 258 Mat S, B, X; 259 MatFactorSchurStatus schurstatus; 260 PetscInt sizesol; 261 262 PetscFunctionBegin; 263 PetscCall(MatFactorFactorizeSchurComplement(F)); 264 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 265 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 266 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 267 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 268 PetscCall(MatBindToCPU(B, S->boundtocpu)); 269 #endif 270 switch (schurstatus) { 271 case MAT_FACTOR_SCHUR_FACTORED: 272 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 273 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 274 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 275 PetscCall(MatBindToCPU(X, S->boundtocpu)); 276 #endif 277 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 278 PetscCall(MatMatSolveTranspose(S, B, X)); 279 } else { 280 PetscCall(MatMatSolve(S, B, X)); 281 } 282 break; 283 case MAT_FACTOR_SCHUR_INVERTED: 284 sizesol = mumps->id.nrhs * mumps->id.size_schur; 285 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 286 PetscCall(PetscFree(mumps->schur_sol)); 287 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 288 mumps->schur_sizesol = sizesol; 289 } 290 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 291 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 292 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 293 PetscCall(MatBindToCPU(X, S->boundtocpu)); 294 #endif 295 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 296 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 297 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 298 } else { 299 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 300 } 301 PetscCall(MatProductSetFromOptions(X)); 302 PetscCall(MatProductSymbolic(X)); 303 PetscCall(MatProductNumeric(X)); 304 305 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 306 break; 307 default: 308 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 309 } 310 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 311 PetscCall(MatDestroy(&B)); 312 PetscCall(MatDestroy(&X)); 313 PetscFunctionReturn(PETSC_SUCCESS); 314 } 315 316 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 317 { 318 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 319 320 PetscFunctionBegin; 321 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 if (!expansion) { /* prepare for the condensation step */ 325 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 326 /* allocate MUMPS internal array to store reduced right-hand sides */ 327 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 328 PetscCall(PetscFree(mumps->id.redrhs)); 329 mumps->id.lredrhs = mumps->id.size_schur; 330 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 331 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 332 } 333 } else { /* prepare for the expansion step */ 334 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 335 PetscCall(MatMumpsSolveSchur_Private(F)); 336 mumps->id.ICNTL(26) = 2; /* expansion phase */ 337 PetscMUMPS_c(mumps); 338 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 339 /* restore defaults */ 340 mumps->id.ICNTL(26) = -1; 341 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 342 if (mumps->id.nrhs > 1) { 343 PetscCall(PetscFree(mumps->id.redrhs)); 344 mumps->id.lredrhs = 0; 345 mumps->sizeredrhs = 0; 346 } 347 } 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 /* 352 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 353 354 input: 355 A - matrix in aij,baij or sbaij format 356 shift - 0: C style output triple; 1: Fortran style output triple. 357 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 358 MAT_REUSE_MATRIX: only the values in v array are updated 359 output: 360 nnz - dim of r, c, and v (number of local nonzero entries of A) 361 r, c, v - row and col index, matrix values (matrix triples) 362 363 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 364 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 365 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 366 367 */ 368 369 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 370 { 371 const PetscScalar *av; 372 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 373 PetscInt64 nz, rnz, i, j, k; 374 PetscMUMPSInt *row, *col; 375 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 376 377 PetscFunctionBegin; 378 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 379 if (reuse == MAT_INITIAL_MATRIX) { 380 nz = aa->nz; 381 ai = aa->i; 382 aj = aa->j; 383 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 384 for (i = k = 0; i < M; i++) { 385 rnz = ai[i + 1] - ai[i]; 386 ajj = aj + ai[i]; 387 for (j = 0; j < rnz; j++) { 388 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 389 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 390 k++; 391 } 392 } 393 mumps->val = (PetscScalar *)av; 394 mumps->irn = row; 395 mumps->jcn = col; 396 mumps->nnz = nz; 397 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 398 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 399 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 404 { 405 PetscInt64 nz, i, j, k, r; 406 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 407 PetscMUMPSInt *row, *col; 408 409 PetscFunctionBegin; 410 nz = a->sliidx[a->totalslices]; 411 if (reuse == MAT_INITIAL_MATRIX) { 412 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 413 for (i = k = 0; i < a->totalslices; i++) { 414 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 415 } 416 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 417 mumps->irn = row; 418 mumps->jcn = col; 419 mumps->nnz = nz; 420 mumps->val = a->val; 421 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 422 else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 427 { 428 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 429 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 430 PetscInt64 M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 431 PetscInt bs; 432 PetscMUMPSInt *row, *col; 433 434 PetscFunctionBegin; 435 if (reuse == MAT_INITIAL_MATRIX) { 436 PetscCall(MatGetBlockSize(A, &bs)); 437 M = A->rmap->N / bs; 438 ai = aa->i; 439 aj = aa->j; 440 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 441 for (i = 0; i < M; i++) { 442 ajj = aj + ai[i]; 443 rnz = ai[i + 1] - ai[i]; 444 for (k = 0; k < rnz; k++) { 445 for (j = 0; j < bs; j++) { 446 for (m = 0; m < bs; m++) { 447 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 448 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 449 idx++; 450 } 451 } 452 } 453 } 454 mumps->irn = row; 455 mumps->jcn = col; 456 mumps->nnz = nz; 457 mumps->val = aa->a; 458 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 459 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 460 PetscFunctionReturn(PETSC_SUCCESS); 461 } 462 463 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 464 { 465 const PetscInt *ai, *aj, *ajj; 466 PetscInt bs; 467 PetscInt64 nz, rnz, i, j, k, m; 468 PetscMUMPSInt *row, *col; 469 PetscScalar *val; 470 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 471 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 472 #if defined(PETSC_USE_COMPLEX) 473 PetscBool isset, hermitian; 474 #endif 475 476 PetscFunctionBegin; 477 #if defined(PETSC_USE_COMPLEX) 478 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 479 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 480 #endif 481 ai = aa->i; 482 aj = aa->j; 483 PetscCall(MatGetBlockSize(A, &bs)); 484 if (reuse == MAT_INITIAL_MATRIX) { 485 const PetscInt64 alloc_size = aa->nz * bs2; 486 487 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 488 if (bs > 1) { 489 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 490 mumps->val = mumps->val_alloc; 491 } else { 492 mumps->val = aa->a; 493 } 494 mumps->irn = row; 495 mumps->jcn = col; 496 } else { 497 row = mumps->irn; 498 col = mumps->jcn; 499 } 500 val = mumps->val; 501 502 nz = 0; 503 if (bs > 1) { 504 for (i = 0; i < mbs; i++) { 505 rnz = ai[i + 1] - ai[i]; 506 ajj = aj + ai[i]; 507 for (j = 0; j < rnz; j++) { 508 for (k = 0; k < bs; k++) { 509 for (m = 0; m < bs; m++) { 510 if (ajj[j] > i || k >= m) { 511 if (reuse == MAT_INITIAL_MATRIX) { 512 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 513 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 514 } 515 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 516 } 517 } 518 } 519 } 520 } 521 } else if (reuse == MAT_INITIAL_MATRIX) { 522 for (i = 0; i < mbs; i++) { 523 rnz = ai[i + 1] - ai[i]; 524 ajj = aj + ai[i]; 525 for (j = 0; j < rnz; j++) { 526 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 527 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 528 nz++; 529 } 530 } 531 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 532 } else if (mumps->nest_vals) 533 PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 534 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 535 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 536 PetscFunctionReturn(PETSC_SUCCESS); 537 } 538 539 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 540 { 541 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 542 PetscInt64 nz, rnz, i, j; 543 const PetscScalar *av, *v1; 544 PetscScalar *val; 545 PetscMUMPSInt *row, *col; 546 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 547 PetscBool missing; 548 #if defined(PETSC_USE_COMPLEX) 549 PetscBool hermitian, isset; 550 #endif 551 552 PetscFunctionBegin; 553 #if defined(PETSC_USE_COMPLEX) 554 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 555 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 556 #endif 557 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 558 ai = aa->i; 559 aj = aa->j; 560 adiag = aa->diag; 561 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 562 if (reuse == MAT_INITIAL_MATRIX) { 563 /* count nz in the upper triangular part of A */ 564 nz = 0; 565 if (missing) { 566 for (i = 0; i < M; i++) { 567 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 568 for (j = ai[i]; j < ai[i + 1]; j++) { 569 if (aj[j] < i) continue; 570 nz++; 571 } 572 } else { 573 nz += ai[i + 1] - adiag[i]; 574 } 575 } 576 } else { 577 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 578 } 579 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 580 PetscCall(PetscMalloc1(nz, &val)); 581 mumps->nnz = nz; 582 mumps->irn = row; 583 mumps->jcn = col; 584 mumps->val = mumps->val_alloc = val; 585 586 nz = 0; 587 if (missing) { 588 for (i = 0; i < M; i++) { 589 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 590 for (j = ai[i]; j < ai[i + 1]; j++) { 591 if (aj[j] < i) continue; 592 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 593 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 594 val[nz] = av[j]; 595 nz++; 596 } 597 } else { 598 rnz = ai[i + 1] - adiag[i]; 599 ajj = aj + adiag[i]; 600 v1 = av + adiag[i]; 601 for (j = 0; j < rnz; j++) { 602 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 603 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 604 val[nz++] = v1[j]; 605 } 606 } 607 } 608 } else { 609 for (i = 0; i < M; i++) { 610 rnz = ai[i + 1] - adiag[i]; 611 ajj = aj + adiag[i]; 612 v1 = av + adiag[i]; 613 for (j = 0; j < rnz; j++) { 614 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 615 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 616 val[nz++] = v1[j]; 617 } 618 } 619 } 620 } else { 621 nz = 0; 622 val = mumps->val; 623 if (missing) { 624 for (i = 0; i < M; i++) { 625 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 626 for (j = ai[i]; j < ai[i + 1]; j++) { 627 if (aj[j] < i) continue; 628 val[nz++] = av[j]; 629 } 630 } else { 631 rnz = ai[i + 1] - adiag[i]; 632 v1 = av + adiag[i]; 633 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 634 } 635 } 636 } else { 637 for (i = 0; i < M; i++) { 638 rnz = ai[i + 1] - adiag[i]; 639 v1 = av + adiag[i]; 640 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 641 } 642 } 643 } 644 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 645 PetscFunctionReturn(PETSC_SUCCESS); 646 } 647 648 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 649 { 650 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 651 PetscInt bs; 652 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 653 PetscMUMPSInt *row, *col; 654 const PetscScalar *av, *bv, *v1, *v2; 655 PetscScalar *val; 656 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 657 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 658 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 659 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 660 #if defined(PETSC_USE_COMPLEX) 661 PetscBool hermitian, isset; 662 #endif 663 664 PetscFunctionBegin; 665 #if defined(PETSC_USE_COMPLEX) 666 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 667 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 668 #endif 669 PetscCall(MatGetBlockSize(A, &bs)); 670 rstart = A->rmap->rstart; 671 ai = aa->i; 672 aj = aa->j; 673 bi = bb->i; 674 bj = bb->j; 675 av = aa->a; 676 bv = bb->a; 677 678 garray = mat->garray; 679 680 if (reuse == MAT_INITIAL_MATRIX) { 681 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 682 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 683 PetscCall(PetscMalloc1(nz, &val)); 684 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 685 mumps->irn = row; 686 mumps->jcn = col; 687 mumps->val = mumps->val_alloc = val; 688 } else { 689 val = mumps->val; 690 } 691 692 jj = 0; 693 irow = rstart; 694 for (i = 0; i < mbs; i++) { 695 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 696 countA = ai[i + 1] - ai[i]; 697 countB = bi[i + 1] - bi[i]; 698 bjj = bj + bi[i]; 699 v1 = av + ai[i] * bs2; 700 v2 = bv + bi[i] * bs2; 701 702 if (bs > 1) { 703 /* A-part */ 704 for (j = 0; j < countA; j++) { 705 for (k = 0; k < bs; k++) { 706 for (m = 0; m < bs; m++) { 707 if (rstart + ajj[j] * bs > irow || k >= m) { 708 if (reuse == MAT_INITIAL_MATRIX) { 709 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 710 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 711 } 712 val[jj++] = v1[j * bs2 + m + k * bs]; 713 } 714 } 715 } 716 } 717 718 /* B-part */ 719 for (j = 0; j < countB; j++) { 720 for (k = 0; k < bs; k++) { 721 for (m = 0; m < bs; m++) { 722 if (reuse == MAT_INITIAL_MATRIX) { 723 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 724 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 725 } 726 val[jj++] = v2[j * bs2 + m + k * bs]; 727 } 728 } 729 } 730 } else { 731 /* A-part */ 732 for (j = 0; j < countA; j++) { 733 if (reuse == MAT_INITIAL_MATRIX) { 734 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 735 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 736 } 737 val[jj++] = v1[j]; 738 } 739 740 /* B-part */ 741 for (j = 0; j < countB; j++) { 742 if (reuse == MAT_INITIAL_MATRIX) { 743 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 744 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 745 } 746 val[jj++] = v2[j]; 747 } 748 } 749 irow += bs; 750 } 751 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 752 PetscFunctionReturn(PETSC_SUCCESS); 753 } 754 755 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 756 { 757 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 758 PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 759 PetscMUMPSInt *row, *col; 760 const PetscScalar *av, *bv, *v1, *v2; 761 PetscScalar *val; 762 Mat Ad, Ao; 763 Mat_SeqAIJ *aa; 764 Mat_SeqAIJ *bb; 765 766 PetscFunctionBegin; 767 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 768 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 769 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 770 771 aa = (Mat_SeqAIJ *)(Ad)->data; 772 bb = (Mat_SeqAIJ *)(Ao)->data; 773 ai = aa->i; 774 aj = aa->j; 775 bi = bb->i; 776 bj = bb->j; 777 778 rstart = A->rmap->rstart; 779 cstart = A->cmap->rstart; 780 781 if (reuse == MAT_INITIAL_MATRIX) { 782 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 783 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 784 PetscCall(PetscMalloc1(nz, &val)); 785 mumps->nnz = nz; 786 mumps->irn = row; 787 mumps->jcn = col; 788 mumps->val = mumps->val_alloc = val; 789 } else { 790 val = mumps->val; 791 } 792 793 jj = 0; 794 irow = rstart; 795 for (i = 0; i < m; i++) { 796 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 797 countA = ai[i + 1] - ai[i]; 798 countB = bi[i + 1] - bi[i]; 799 bjj = bj + bi[i]; 800 v1 = av + ai[i]; 801 v2 = bv + bi[i]; 802 803 /* A-part */ 804 for (j = 0; j < countA; j++) { 805 if (reuse == MAT_INITIAL_MATRIX) { 806 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 807 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 808 } 809 val[jj++] = v1[j]; 810 } 811 812 /* B-part */ 813 for (j = 0; j < countB; j++) { 814 if (reuse == MAT_INITIAL_MATRIX) { 815 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 816 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 817 } 818 val[jj++] = v2[j]; 819 } 820 irow++; 821 } 822 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 823 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 824 PetscFunctionReturn(PETSC_SUCCESS); 825 } 826 827 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 828 { 829 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 830 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 831 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 832 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 833 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 834 const PetscInt bs2 = mat->bs2; 835 PetscInt bs; 836 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 837 PetscMUMPSInt *row, *col; 838 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 839 PetscScalar *val; 840 841 PetscFunctionBegin; 842 PetscCall(MatGetBlockSize(A, &bs)); 843 if (reuse == MAT_INITIAL_MATRIX) { 844 nz = bs2 * (aa->nz + bb->nz); 845 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 846 PetscCall(PetscMalloc1(nz, &val)); 847 mumps->nnz = nz; 848 mumps->irn = row; 849 mumps->jcn = col; 850 mumps->val = mumps->val_alloc = val; 851 } else { 852 val = mumps->val; 853 } 854 855 jj = 0; 856 irow = rstart; 857 for (i = 0; i < mbs; i++) { 858 countA = ai[i + 1] - ai[i]; 859 countB = bi[i + 1] - bi[i]; 860 ajj = aj + ai[i]; 861 bjj = bj + bi[i]; 862 v1 = av + bs2 * ai[i]; 863 v2 = bv + bs2 * bi[i]; 864 865 idx = 0; 866 /* A-part */ 867 for (k = 0; k < countA; k++) { 868 for (j = 0; j < bs; j++) { 869 for (n = 0; n < bs; n++) { 870 if (reuse == MAT_INITIAL_MATRIX) { 871 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 872 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 873 } 874 val[jj++] = v1[idx++]; 875 } 876 } 877 } 878 879 idx = 0; 880 /* B-part */ 881 for (k = 0; k < countB; k++) { 882 for (j = 0; j < bs; j++) { 883 for (n = 0; n < bs; n++) { 884 if (reuse == MAT_INITIAL_MATRIX) { 885 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 886 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 887 } 888 val[jj++] = v2[idx++]; 889 } 890 } 891 } 892 irow += bs; 893 } 894 PetscFunctionReturn(PETSC_SUCCESS); 895 } 896 897 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 898 { 899 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 900 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 901 PetscMUMPSInt *row, *col; 902 const PetscScalar *av, *bv, *v1, *v2; 903 PetscScalar *val; 904 Mat Ad, Ao; 905 Mat_SeqAIJ *aa; 906 Mat_SeqAIJ *bb; 907 #if defined(PETSC_USE_COMPLEX) 908 PetscBool hermitian, isset; 909 #endif 910 911 PetscFunctionBegin; 912 #if defined(PETSC_USE_COMPLEX) 913 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 914 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 915 #endif 916 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 917 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 918 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 919 920 aa = (Mat_SeqAIJ *)(Ad)->data; 921 bb = (Mat_SeqAIJ *)(Ao)->data; 922 ai = aa->i; 923 aj = aa->j; 924 adiag = aa->diag; 925 bi = bb->i; 926 bj = bb->j; 927 928 rstart = A->rmap->rstart; 929 930 if (reuse == MAT_INITIAL_MATRIX) { 931 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 932 nzb = 0; /* num of upper triangular entries in mat->B */ 933 for (i = 0; i < m; i++) { 934 nza += (ai[i + 1] - adiag[i]); 935 countB = bi[i + 1] - bi[i]; 936 bjj = bj + bi[i]; 937 for (j = 0; j < countB; j++) { 938 if (garray[bjj[j]] > rstart) nzb++; 939 } 940 } 941 942 nz = nza + nzb; /* total nz of upper triangular part of mat */ 943 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 944 PetscCall(PetscMalloc1(nz, &val)); 945 mumps->nnz = nz; 946 mumps->irn = row; 947 mumps->jcn = col; 948 mumps->val = mumps->val_alloc = val; 949 } else { 950 val = mumps->val; 951 } 952 953 jj = 0; 954 irow = rstart; 955 for (i = 0; i < m; i++) { 956 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 957 v1 = av + adiag[i]; 958 countA = ai[i + 1] - adiag[i]; 959 countB = bi[i + 1] - bi[i]; 960 bjj = bj + bi[i]; 961 v2 = bv + bi[i]; 962 963 /* A-part */ 964 for (j = 0; j < countA; j++) { 965 if (reuse == MAT_INITIAL_MATRIX) { 966 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 967 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 968 } 969 val[jj++] = v1[j]; 970 } 971 972 /* B-part */ 973 for (j = 0; j < countB; j++) { 974 if (garray[bjj[j]] > rstart) { 975 if (reuse == MAT_INITIAL_MATRIX) { 976 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 977 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 978 } 979 val[jj++] = v2[j]; 980 } 981 } 982 irow++; 983 } 984 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 985 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 986 PetscFunctionReturn(PETSC_SUCCESS); 987 } 988 989 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 990 { 991 const PetscScalar *av; 992 const PetscInt M = A->rmap->n; 993 PetscInt64 i; 994 PetscMUMPSInt *row, *col; 995 Vec v; 996 997 PetscFunctionBegin; 998 PetscCall(MatDiagonalGetDiagonal(A, &v)); 999 PetscCall(VecGetArrayRead(v, &av)); 1000 if (reuse == MAT_INITIAL_MATRIX) { 1001 PetscCall(PetscMalloc2(M, &row, M, &col)); 1002 for (i = 0; i < M; i++) { 1003 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1004 col[i] = row[i]; 1005 } 1006 mumps->val = (PetscScalar *)av; 1007 mumps->irn = row; 1008 mumps->jcn = col; 1009 mumps->nnz = M; 1010 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1011 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1012 PetscCall(VecRestoreArrayRead(v, &av)); 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1017 { 1018 PetscScalar *v; 1019 const PetscInt m = A->rmap->n, N = A->cmap->N; 1020 PetscInt lda; 1021 PetscInt64 i, j; 1022 PetscMUMPSInt *row, *col; 1023 1024 PetscFunctionBegin; 1025 PetscCall(MatDenseGetArray(A, &v)); 1026 PetscCall(MatDenseGetLDA(A, &lda)); 1027 if (reuse == MAT_INITIAL_MATRIX) { 1028 PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 1029 for (i = 0; i < m; i++) { 1030 col[i] = 0; 1031 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1032 } 1033 for (j = 1; j < N; j++) { 1034 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 1035 PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 1036 } 1037 if (lda == m) mumps->val = v; 1038 else { 1039 PetscCall(PetscMalloc1(m * N, &mumps->val)); 1040 mumps->val_alloc = mumps->val; 1041 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1042 } 1043 mumps->irn = row; 1044 mumps->jcn = col; 1045 mumps->nnz = m * N; 1046 } else { 1047 if (lda == m && !mumps->nest_vals) mumps->val = v; 1048 else { 1049 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1050 } 1051 } 1052 PetscCall(MatDenseRestoreArray(A, &v)); 1053 PetscFunctionReturn(PETSC_SUCCESS); 1054 } 1055 1056 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1057 { 1058 Mat **mats; 1059 PetscInt nr, nc; 1060 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1064 if (reuse == MAT_INITIAL_MATRIX) { 1065 PetscMUMPSInt *irns, *jcns; 1066 PetscScalar *vals; 1067 PetscInt64 totnnz, cumnnz, maxnnz; 1068 PetscInt *pjcns_w; 1069 IS *rows, *cols; 1070 PetscInt **rows_idx, **cols_idx; 1071 1072 cumnnz = 0; 1073 maxnnz = 0; 1074 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1075 for (PetscInt r = 0; r < nr; r++) { 1076 for (PetscInt c = 0; c < nc; c++) { 1077 Mat sub = mats[r][c]; 1078 1079 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1080 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1081 if (sub) { 1082 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1083 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE, isDiag, isDense; 1084 MatInfo info; 1085 1086 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1087 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1088 else { 1089 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1090 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1091 } 1092 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1093 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1094 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1095 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1096 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1097 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1098 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1099 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 1100 1101 if (chol) { 1102 if (r == c) { 1103 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1104 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1105 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1106 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1107 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1108 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1109 } else { 1110 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1111 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1112 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1113 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1114 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1115 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1116 } 1117 } else { 1118 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1119 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1120 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1121 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1122 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1123 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1124 } 1125 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1126 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1127 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1128 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1129 maxnnz = PetscMax(maxnnz, info.nz_used); 1130 } 1131 } 1132 } 1133 1134 /* Allocate total COO */ 1135 totnnz = cumnnz; 1136 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1137 PetscCall(PetscMalloc1(totnnz, &vals)); 1138 1139 /* Handle rows and column maps 1140 We directly map rows and use an SF for the columns */ 1141 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1142 PetscCall(MatNestGetISs(A, rows, cols)); 1143 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1144 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1145 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1146 else (void)maxnnz; 1147 1148 cumnnz = 0; 1149 for (PetscInt r = 0; r < nr; r++) { 1150 for (PetscInt c = 0; c < nc; c++) { 1151 Mat sub = mats[r][c]; 1152 const PetscInt *ridx = rows_idx[r]; 1153 const PetscInt *cidx = cols_idx[c]; 1154 PetscInt rst; 1155 PetscSF csf; 1156 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1157 PetscLayout cmap; 1158 1159 mumps->nest_vals_start[r * nc + c] = cumnnz; 1160 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1161 1162 /* Extract inner blocks if needed */ 1163 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1164 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1165 else { 1166 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1167 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1168 } 1169 swap = (PetscBool)(isTrans || isHTrans); 1170 1171 /* Get column layout to map off-process columns */ 1172 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1173 1174 /* Get row start to map on-process rows */ 1175 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1176 1177 /* Directly use the mumps datastructure and use C ordering for now */ 1178 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1179 1180 /* Swap the role of rows and columns indices for transposed blocks 1181 since we need values with global final ordering */ 1182 if (swap) { 1183 cidx = rows_idx[r]; 1184 ridx = cols_idx[c]; 1185 } 1186 1187 /* Communicate column indices 1188 This could have been done with a single SF but it would have complicated the code a lot. 1189 But since we do it only once, we pay the price of setting up an SF for each block */ 1190 if (PetscDefined(USE_64BIT_INDICES)) { 1191 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1192 } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 1193 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1194 PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1195 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1196 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1197 PetscCall(PetscSFDestroy(&csf)); 1198 1199 /* Import indices: use direct map for rows and mapped indices for columns */ 1200 if (swap) { 1201 for (PetscInt k = 0; k < mumps->nnz; k++) { 1202 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1203 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1204 } 1205 } else { 1206 for (PetscInt k = 0; k < mumps->nnz; k++) { 1207 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1208 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1209 } 1210 } 1211 1212 /* Import values to full COO */ 1213 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1214 if (isHTrans) { /* conjugate the entries */ 1215 PetscScalar *v = vals + cumnnz; 1216 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]); 1217 } 1218 1219 /* Shift new starting point and sanity check */ 1220 cumnnz += mumps->nnz; 1221 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1222 1223 /* Free scratch memory */ 1224 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1225 PetscCall(PetscFree(mumps->val_alloc)); 1226 mumps->val = NULL; 1227 mumps->nnz = 0; 1228 } 1229 } 1230 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1231 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1232 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1233 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1234 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1235 mumps->nest_vals_start[nr * nc] = cumnnz; 1236 1237 /* Set pointers for final MUMPS data structure */ 1238 mumps->nest_vals = vals; 1239 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1240 mumps->val = vals; 1241 mumps->irn = irns; 1242 mumps->jcn = jcns; 1243 mumps->nnz = cumnnz; 1244 } else { 1245 PetscScalar *oval = mumps->nest_vals; 1246 for (PetscInt r = 0; r < nr; r++) { 1247 for (PetscInt c = 0; c < nc; c++) { 1248 PetscBool isTrans, isHTrans = PETSC_FALSE; 1249 Mat sub = mats[r][c]; 1250 PetscInt midx = r * nc + c; 1251 1252 if (!mumps->nest_convert_to_triples[midx]) continue; 1253 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1254 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1255 else { 1256 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1257 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1258 } 1259 mumps->val = oval + mumps->nest_vals_start[midx]; 1260 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1261 if (isHTrans) { 1262 PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1263 for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1264 } 1265 } 1266 } 1267 mumps->val = oval; 1268 } 1269 PetscFunctionReturn(PETSC_SUCCESS); 1270 } 1271 1272 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1273 { 1274 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1275 1276 PetscFunctionBegin; 1277 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1278 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1279 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1280 PetscCall(VecDestroy(&mumps->b_seq)); 1281 PetscCall(VecDestroy(&mumps->x_seq)); 1282 PetscCall(PetscFree(mumps->id.perm_in)); 1283 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1284 PetscCall(PetscFree(mumps->val_alloc)); 1285 PetscCall(PetscFree(mumps->info)); 1286 PetscCall(PetscFree(mumps->ICNTL_pre)); 1287 PetscCall(PetscFree(mumps->CNTL_pre)); 1288 PetscCall(MatMumpsResetSchur_Private(mumps)); 1289 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1290 mumps->id.job = JOB_END; 1291 PetscMUMPS_c(mumps); 1292 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1293 if (mumps->mumps_comm != MPI_COMM_NULL) { 1294 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1295 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1296 } 1297 } 1298 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1299 if (mumps->use_petsc_omp_support) { 1300 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1301 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1302 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1303 } 1304 #endif 1305 PetscCall(PetscFree(mumps->ia_alloc)); 1306 PetscCall(PetscFree(mumps->ja_alloc)); 1307 PetscCall(PetscFree(mumps->recvcount)); 1308 PetscCall(PetscFree(mumps->reqs)); 1309 PetscCall(PetscFree(mumps->irhs_loc)); 1310 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1311 PetscCall(PetscFree(mumps->nest_vals)); 1312 PetscCall(PetscFree(A->data)); 1313 1314 /* clear composed functions */ 1315 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1316 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1317 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1318 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1319 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1320 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1321 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1322 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1323 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1324 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1325 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1326 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1327 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1328 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1329 PetscFunctionReturn(PETSC_SUCCESS); 1330 } 1331 1332 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1333 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1334 { 1335 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1336 const PetscMPIInt ompsize = mumps->omp_comm_size; 1337 PetscInt i, m, M, rstart; 1338 1339 PetscFunctionBegin; 1340 PetscCall(MatGetSize(A, &M, NULL)); 1341 PetscCall(MatGetLocalSize(A, &m, NULL)); 1342 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1343 if (ompsize == 1) { 1344 if (!mumps->irhs_loc) { 1345 mumps->nloc_rhs = m; 1346 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1347 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1348 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1349 } 1350 mumps->id.rhs_loc = (MumpsScalar *)array; 1351 } else { 1352 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1353 const PetscInt *ranges; 1354 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1355 MPI_Group petsc_group, omp_group; 1356 PetscScalar *recvbuf = NULL; 1357 1358 if (mumps->is_omp_master) { 1359 /* Lazily initialize the omp stuff for distributed rhs */ 1360 if (!mumps->irhs_loc) { 1361 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1362 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1363 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1364 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1365 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1366 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1367 1368 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1369 mumps->nloc_rhs = 0; 1370 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1371 for (j = 0; j < ompsize; j++) { 1372 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1373 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1374 } 1375 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1376 for (j = k = 0; j < ompsize; j++) { 1377 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1378 } 1379 1380 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1381 PetscCallMPI(MPI_Group_free(&petsc_group)); 1382 PetscCallMPI(MPI_Group_free(&omp_group)); 1383 } 1384 1385 /* Realloc buffers when current nrhs is bigger than what we have met */ 1386 if (nrhs > mumps->max_nrhs) { 1387 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1388 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1389 mumps->max_nrhs = nrhs; 1390 } 1391 1392 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1393 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1394 mumps->rhs_disps[0] = 0; 1395 for (j = 1; j < ompsize; j++) { 1396 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1397 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1398 } 1399 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1400 } 1401 1402 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1403 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1404 1405 if (mumps->is_omp_master) { 1406 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1407 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1408 for (j = 0; j < ompsize; j++) { 1409 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1410 dst = dstbase; 1411 for (i = 0; i < nrhs; i++) { 1412 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1413 src += mumps->rhs_nrow[j]; 1414 dst += mumps->nloc_rhs; 1415 } 1416 dstbase += mumps->rhs_nrow[j]; 1417 } 1418 } 1419 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1420 } 1421 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1422 } 1423 mumps->id.nrhs = nrhs; 1424 mumps->id.nloc_rhs = mumps->nloc_rhs; 1425 mumps->id.lrhs_loc = mumps->nloc_rhs; 1426 mumps->id.irhs_loc = mumps->irhs_loc; 1427 PetscFunctionReturn(PETSC_SUCCESS); 1428 } 1429 1430 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1431 { 1432 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1433 const PetscScalar *rarray = NULL; 1434 PetscScalar *array; 1435 IS is_iden, is_petsc; 1436 PetscInt i; 1437 PetscBool second_solve = PETSC_FALSE; 1438 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1439 1440 PetscFunctionBegin; 1441 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1442 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1443 &cite1)); 1444 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1445 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1446 &cite2)); 1447 1448 if (A->factorerrortype) { 1449 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1450 PetscCall(VecSetInf(x)); 1451 PetscFunctionReturn(PETSC_SUCCESS); 1452 } 1453 1454 mumps->id.nrhs = 1; 1455 if (mumps->petsc_size > 1) { 1456 if (mumps->ICNTL20 == 10) { 1457 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1458 PetscCall(VecGetArrayRead(b, &rarray)); 1459 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1460 } else { 1461 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1462 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1463 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1464 if (!mumps->myid) { 1465 PetscCall(VecGetArray(mumps->b_seq, &array)); 1466 mumps->id.rhs = (MumpsScalar *)array; 1467 } 1468 } 1469 } else { /* petsc_size == 1 */ 1470 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1471 PetscCall(VecCopy(b, x)); 1472 PetscCall(VecGetArray(x, &array)); 1473 mumps->id.rhs = (MumpsScalar *)array; 1474 } 1475 1476 /* 1477 handle condensation step of Schur complement (if any) 1478 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1479 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1480 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1481 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1482 */ 1483 if (mumps->id.size_schur > 0) { 1484 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1485 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1486 second_solve = PETSC_TRUE; 1487 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1488 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1489 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1490 } 1491 /* solve phase */ 1492 mumps->id.job = JOB_SOLVE; 1493 PetscMUMPS_c(mumps); 1494 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1495 1496 /* handle expansion step of Schur complement (if any) */ 1497 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1498 else if (mumps->id.ICNTL(26) == 1) { 1499 PetscCall(MatMumpsSolveSchur_Private(A)); 1500 for (i = 0; i < mumps->id.size_schur; ++i) { 1501 #if !defined(PETSC_USE_COMPLEX) 1502 PetscScalar val = mumps->id.redrhs[i]; 1503 #else 1504 PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i; 1505 #endif 1506 array[mumps->id.listvar_schur[i] - 1] = val; 1507 } 1508 } 1509 1510 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1511 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1512 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1513 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1514 } 1515 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1516 PetscInt *isol2_loc = NULL; 1517 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1518 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1519 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1520 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1521 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1522 PetscCall(ISDestroy(&is_iden)); 1523 PetscCall(ISDestroy(&is_petsc)); 1524 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1525 } 1526 1527 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1528 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1529 } 1530 1531 if (mumps->petsc_size > 1) { 1532 if (mumps->ICNTL20 == 10) { 1533 PetscCall(VecRestoreArrayRead(b, &rarray)); 1534 } else if (!mumps->myid) { 1535 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1536 } 1537 } else PetscCall(VecRestoreArray(x, &array)); 1538 1539 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1540 PetscFunctionReturn(PETSC_SUCCESS); 1541 } 1542 1543 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1544 { 1545 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1546 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1547 1548 PetscFunctionBegin; 1549 mumps->id.ICNTL(9) = 0; 1550 PetscCall(MatSolve_MUMPS(A, b, x)); 1551 mumps->id.ICNTL(9) = value; 1552 PetscFunctionReturn(PETSC_SUCCESS); 1553 } 1554 1555 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1556 { 1557 Mat Bt = NULL; 1558 PetscBool denseX, denseB, flg, flgT; 1559 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1560 PetscInt i, nrhs, M; 1561 PetscScalar *array; 1562 const PetscScalar *rbray; 1563 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1564 PetscMUMPSInt *isol_loc, *isol_loc_save; 1565 PetscScalar *bray, *sol_loc, *sol_loc_save; 1566 IS is_to, is_from; 1567 PetscInt k, proc, j, m, myrstart; 1568 const PetscInt *rstart; 1569 Vec v_mpi, msol_loc; 1570 VecScatter scat_sol; 1571 Vec b_seq; 1572 VecScatter scat_rhs; 1573 PetscScalar *aa; 1574 PetscInt spnr, *ia, *ja; 1575 Mat_MPIAIJ *b = NULL; 1576 1577 PetscFunctionBegin; 1578 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1579 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1580 1581 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1582 if (denseB) { 1583 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1584 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1585 } else { /* sparse B */ 1586 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1587 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1588 if (flgT) { /* input B is transpose of actual RHS matrix, 1589 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1590 PetscCall(MatTransposeGetMat(B, &Bt)); 1591 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1592 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1593 } 1594 1595 PetscCall(MatGetSize(B, &M, &nrhs)); 1596 mumps->id.nrhs = nrhs; 1597 mumps->id.lrhs = M; 1598 mumps->id.rhs = NULL; 1599 1600 if (mumps->petsc_size == 1) { 1601 PetscScalar *aa; 1602 PetscInt spnr, *ia, *ja; 1603 PetscBool second_solve = PETSC_FALSE; 1604 1605 PetscCall(MatDenseGetArray(X, &array)); 1606 mumps->id.rhs = (MumpsScalar *)array; 1607 1608 if (denseB) { 1609 /* copy B to X */ 1610 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1611 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1612 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1613 } else { /* sparse B */ 1614 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1615 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1616 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1617 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1618 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1619 } 1620 /* handle condensation step of Schur complement (if any) */ 1621 if (mumps->id.size_schur > 0) { 1622 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1623 second_solve = PETSC_TRUE; 1624 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1625 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1626 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1627 } 1628 /* solve phase */ 1629 mumps->id.job = JOB_SOLVE; 1630 PetscMUMPS_c(mumps); 1631 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1632 1633 /* handle expansion step of Schur complement (if any) */ 1634 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1635 else if (mumps->id.ICNTL(26) == 1) { 1636 PetscCall(MatMumpsSolveSchur_Private(A)); 1637 for (j = 0; j < nrhs; ++j) 1638 for (i = 0; i < mumps->id.size_schur; ++i) { 1639 #if !defined(PETSC_USE_COMPLEX) 1640 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs]; 1641 #else 1642 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i; 1643 #endif 1644 array[mumps->id.listvar_schur[i] - 1 + j * M] = val; 1645 } 1646 } 1647 if (!denseB) { /* sparse B */ 1648 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1649 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1650 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1651 } 1652 PetscCall(MatDenseRestoreArray(X, &array)); 1653 PetscFunctionReturn(PETSC_SUCCESS); 1654 } 1655 1656 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1657 PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1658 1659 /* create msol_loc to hold mumps local solution */ 1660 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1661 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1662 1663 lsol_loc = mumps->id.lsol_loc; 1664 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1665 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1666 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1667 mumps->id.isol_loc = isol_loc; 1668 1669 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1670 1671 if (denseB) { 1672 if (mumps->ICNTL20 == 10) { 1673 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1674 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1675 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1676 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1677 PetscCall(MatGetLocalSize(B, &m, NULL)); 1678 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1679 } else { 1680 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1681 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1682 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1683 0, re-arrange B into desired order, which is a local operation. 1684 */ 1685 1686 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1687 /* wrap dense rhs matrix B into a vector v_mpi */ 1688 PetscCall(MatGetLocalSize(B, &m, NULL)); 1689 PetscCall(MatDenseGetArray(B, &bray)); 1690 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1691 PetscCall(MatDenseRestoreArray(B, &bray)); 1692 1693 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1694 if (!mumps->myid) { 1695 PetscInt *idx; 1696 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1697 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1698 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1699 k = 0; 1700 for (proc = 0; proc < mumps->petsc_size; proc++) { 1701 for (j = 0; j < nrhs; j++) { 1702 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1703 } 1704 } 1705 1706 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1707 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1708 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1709 } else { 1710 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1711 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1712 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1713 } 1714 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1715 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1716 PetscCall(ISDestroy(&is_to)); 1717 PetscCall(ISDestroy(&is_from)); 1718 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1719 1720 if (!mumps->myid) { /* define rhs on the host */ 1721 PetscCall(VecGetArray(b_seq, &bray)); 1722 mumps->id.rhs = (MumpsScalar *)bray; 1723 PetscCall(VecRestoreArray(b_seq, &bray)); 1724 } 1725 } 1726 } else { /* sparse B */ 1727 b = (Mat_MPIAIJ *)Bt->data; 1728 1729 /* wrap dense X into a vector v_mpi */ 1730 PetscCall(MatGetLocalSize(X, &m, NULL)); 1731 PetscCall(MatDenseGetArray(X, &bray)); 1732 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1733 PetscCall(MatDenseRestoreArray(X, &bray)); 1734 1735 if (!mumps->myid) { 1736 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1737 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1738 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1739 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1740 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1741 } else { 1742 mumps->id.irhs_ptr = NULL; 1743 mumps->id.irhs_sparse = NULL; 1744 mumps->id.nz_rhs = 0; 1745 mumps->id.rhs_sparse = NULL; 1746 } 1747 } 1748 1749 /* solve phase */ 1750 mumps->id.job = JOB_SOLVE; 1751 PetscMUMPS_c(mumps); 1752 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1753 1754 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1755 PetscCall(MatDenseGetArray(X, &array)); 1756 PetscCall(VecPlaceArray(v_mpi, array)); 1757 1758 /* create scatter scat_sol */ 1759 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1760 /* iidx: index for scatter mumps solution to petsc X */ 1761 1762 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1763 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1764 for (i = 0; i < lsol_loc; i++) { 1765 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1766 1767 for (proc = 0; proc < mumps->petsc_size; proc++) { 1768 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1769 myrstart = rstart[proc]; 1770 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1771 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1772 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1773 break; 1774 } 1775 } 1776 1777 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1778 } 1779 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1780 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1781 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1782 PetscCall(ISDestroy(&is_from)); 1783 PetscCall(ISDestroy(&is_to)); 1784 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1785 PetscCall(MatDenseRestoreArray(X, &array)); 1786 1787 /* free spaces */ 1788 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1789 mumps->id.isol_loc = isol_loc_save; 1790 1791 PetscCall(PetscFree2(sol_loc, isol_loc)); 1792 PetscCall(PetscFree(idxx)); 1793 PetscCall(VecDestroy(&msol_loc)); 1794 PetscCall(VecDestroy(&v_mpi)); 1795 if (!denseB) { 1796 if (!mumps->myid) { 1797 b = (Mat_MPIAIJ *)Bt->data; 1798 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1799 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1800 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1801 } 1802 } else { 1803 if (mumps->ICNTL20 == 0) { 1804 PetscCall(VecDestroy(&b_seq)); 1805 PetscCall(VecScatterDestroy(&scat_rhs)); 1806 } 1807 } 1808 PetscCall(VecScatterDestroy(&scat_sol)); 1809 PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)))); 1810 PetscFunctionReturn(PETSC_SUCCESS); 1811 } 1812 1813 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1814 { 1815 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1816 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1817 1818 PetscFunctionBegin; 1819 mumps->id.ICNTL(9) = 0; 1820 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1821 mumps->id.ICNTL(9) = value; 1822 PetscFunctionReturn(PETSC_SUCCESS); 1823 } 1824 1825 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1826 { 1827 PetscBool flg; 1828 Mat B; 1829 1830 PetscFunctionBegin; 1831 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1832 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1833 1834 /* Create B=Bt^T that uses Bt's data structure */ 1835 PetscCall(MatCreateTranspose(Bt, &B)); 1836 1837 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1838 PetscCall(MatDestroy(&B)); 1839 PetscFunctionReturn(PETSC_SUCCESS); 1840 } 1841 1842 #if !defined(PETSC_USE_COMPLEX) 1843 /* 1844 input: 1845 F: numeric factor 1846 output: 1847 nneg: total number of negative pivots 1848 nzero: total number of zero pivots 1849 npos: (global dimension of F) - nneg - nzero 1850 */ 1851 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1852 { 1853 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1854 PetscMPIInt size; 1855 1856 PetscFunctionBegin; 1857 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1858 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1859 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1860 1861 if (nneg) *nneg = mumps->id.INFOG(12); 1862 if (nzero || npos) { 1863 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1864 if (nzero) *nzero = mumps->id.INFOG(28); 1865 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1866 } 1867 PetscFunctionReturn(PETSC_SUCCESS); 1868 } 1869 #endif 1870 1871 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1872 { 1873 PetscInt i, nreqs; 1874 PetscMUMPSInt *irn, *jcn; 1875 PetscMPIInt count; 1876 PetscInt64 totnnz, remain; 1877 const PetscInt osize = mumps->omp_comm_size; 1878 PetscScalar *val; 1879 1880 PetscFunctionBegin; 1881 if (osize > 1) { 1882 if (reuse == MAT_INITIAL_MATRIX) { 1883 /* master first gathers counts of nonzeros to receive */ 1884 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1885 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1886 1887 /* Then each computes number of send/recvs */ 1888 if (mumps->is_omp_master) { 1889 /* Start from 1 since self communication is not done in MPI */ 1890 nreqs = 0; 1891 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1892 } else { 1893 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1894 } 1895 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1896 1897 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1898 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1899 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1900 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1901 */ 1902 nreqs = 0; /* counter for actual send/recvs */ 1903 if (mumps->is_omp_master) { 1904 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1905 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1906 PetscCall(PetscMalloc1(totnnz, &val)); 1907 1908 /* Self communication */ 1909 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1910 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1911 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1912 1913 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1914 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1915 PetscCall(PetscFree(mumps->val_alloc)); 1916 mumps->nnz = totnnz; 1917 mumps->irn = irn; 1918 mumps->jcn = jcn; 1919 mumps->val = mumps->val_alloc = val; 1920 1921 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1922 jcn += mumps->recvcount[0]; 1923 val += mumps->recvcount[0]; 1924 1925 /* Remote communication */ 1926 for (i = 1; i < osize; i++) { 1927 count = PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 1928 remain = mumps->recvcount[i] - count; 1929 while (count > 0) { 1930 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1931 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1932 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1933 irn += count; 1934 jcn += count; 1935 val += count; 1936 count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1937 remain -= count; 1938 } 1939 } 1940 } else { 1941 irn = mumps->irn; 1942 jcn = mumps->jcn; 1943 val = mumps->val; 1944 count = PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 1945 remain = mumps->nnz - count; 1946 while (count > 0) { 1947 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1948 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1949 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1950 irn += count; 1951 jcn += count; 1952 val += count; 1953 count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1954 remain -= count; 1955 } 1956 } 1957 } else { 1958 nreqs = 0; 1959 if (mumps->is_omp_master) { 1960 val = mumps->val + mumps->recvcount[0]; 1961 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1962 count = PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 1963 remain = mumps->recvcount[i] - count; 1964 while (count > 0) { 1965 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1966 val += count; 1967 count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1968 remain -= count; 1969 } 1970 } 1971 } else { 1972 val = mumps->val; 1973 count = PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 1974 remain = mumps->nnz - count; 1975 while (count > 0) { 1976 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1977 val += count; 1978 count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1979 remain -= count; 1980 } 1981 } 1982 } 1983 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1984 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1985 } 1986 PetscFunctionReturn(PETSC_SUCCESS); 1987 } 1988 1989 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info) 1990 { 1991 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1992 PetscBool isMPIAIJ; 1993 1994 PetscFunctionBegin; 1995 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1996 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1997 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1998 PetscFunctionReturn(PETSC_SUCCESS); 1999 } 2000 2001 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 2002 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 2003 2004 /* numerical factorization phase */ 2005 mumps->id.job = JOB_FACTNUMERIC; 2006 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2007 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 2008 } else { 2009 mumps->id.a_loc = (MumpsScalar *)mumps->val; 2010 } 2011 PetscMUMPS_c(mumps); 2012 if (mumps->id.INFOG(1) < 0) { 2013 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2014 if (mumps->id.INFOG(1) == -10) { 2015 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2016 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2017 } else if (mumps->id.INFOG(1) == -13) { 2018 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2019 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2020 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2021 PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2022 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2023 } else { 2024 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2025 F->factorerrortype = MAT_FACTOR_OTHER; 2026 } 2027 } 2028 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2029 2030 F->assembled = PETSC_TRUE; 2031 2032 if (F->schur) { /* reset Schur status to unfactored */ 2033 #if defined(PETSC_HAVE_CUDA) 2034 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 2035 #endif 2036 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2037 mumps->id.ICNTL(19) = 2; 2038 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2039 } 2040 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2041 } 2042 2043 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2044 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2045 2046 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2047 if (mumps->petsc_size > 1) { 2048 PetscInt lsol_loc; 2049 PetscScalar *sol_loc; 2050 2051 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 2052 2053 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2054 if (mumps->x_seq) { 2055 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2056 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 2057 PetscCall(VecDestroy(&mumps->x_seq)); 2058 } 2059 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2060 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 2061 mumps->id.lsol_loc = lsol_loc; 2062 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 2063 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 2064 } 2065 PetscCall(PetscLogFlops((double)mumps->id.RINFO(2))); 2066 PetscFunctionReturn(PETSC_SUCCESS); 2067 } 2068 2069 /* Sets MUMPS options from the options database */ 2070 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2071 { 2072 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2073 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2074 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2075 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 2076 MumpsScalar *arr; 2077 2078 PetscFunctionBegin; 2079 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2080 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2081 PetscInt nthreads = 0; 2082 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2083 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2084 2085 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2086 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2087 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2088 2089 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2090 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2091 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2092 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2093 if (mumps->use_petsc_omp_support) { 2094 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2095 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2096 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2097 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2098 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2099 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2100 #endif 2101 } else { 2102 mumps->omp_comm = PETSC_COMM_SELF; 2103 mumps->mumps_comm = mumps->petsc_comm; 2104 mumps->is_omp_master = PETSC_TRUE; 2105 } 2106 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2107 mumps->reqs = NULL; 2108 mumps->tag = 0; 2109 2110 if (mumps->mumps_comm != MPI_COMM_NULL) { 2111 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2112 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2113 MPI_Comm comm; 2114 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2115 mumps->mumps_comm = comm; 2116 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2117 } 2118 2119 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2120 mumps->id.job = JOB_INIT; 2121 mumps->id.par = 1; /* host participates factorizaton and solve */ 2122 mumps->id.sym = mumps->sym; 2123 2124 size = mumps->id.size_schur; 2125 arr = mumps->id.schur; 2126 listvar_schur = mumps->id.listvar_schur; 2127 PetscMUMPS_c(mumps); 2128 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2129 2130 /* set PETSc-MUMPS default options - override MUMPS default */ 2131 mumps->id.ICNTL(3) = 0; 2132 mumps->id.ICNTL(4) = 0; 2133 if (mumps->petsc_size == 1) { 2134 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2135 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2136 } else { 2137 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2138 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2139 } 2140 2141 /* restore cached ICNTL and CNTL values */ 2142 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2143 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2144 PetscCall(PetscFree(mumps->ICNTL_pre)); 2145 PetscCall(PetscFree(mumps->CNTL_pre)); 2146 2147 if (schur) { 2148 mumps->id.size_schur = size; 2149 mumps->id.schur_lld = size; 2150 mumps->id.schur = arr; 2151 mumps->id.listvar_schur = listvar_schur; 2152 if (mumps->petsc_size > 1) { 2153 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2154 2155 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2156 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2157 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2158 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2159 } else { 2160 if (F->factortype == MAT_FACTOR_LU) { 2161 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2162 } else { 2163 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2164 } 2165 } 2166 mumps->id.ICNTL(26) = -1; 2167 } 2168 2169 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2170 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2171 */ 2172 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2173 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2174 2175 mumps->scat_rhs = NULL; 2176 mumps->scat_sol = NULL; 2177 } 2178 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2179 if (flg) mumps->id.ICNTL(1) = icntl; 2180 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2181 if (flg) mumps->id.ICNTL(2) = icntl; 2182 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2183 if (flg) mumps->id.ICNTL(3) = icntl; 2184 2185 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2186 if (flg) mumps->id.ICNTL(4) = icntl; 2187 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2188 2189 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2190 if (flg) mumps->id.ICNTL(6) = icntl; 2191 2192 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2193 if (flg) { 2194 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2195 mumps->id.ICNTL(7) = icntl; 2196 } 2197 2198 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2199 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2200 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2201 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2202 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2203 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2204 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2205 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2206 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2207 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2208 if (flg) { 2209 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2210 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2211 } 2212 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2213 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2214 PetscCall(MatDestroy(&F->schur)); 2215 PetscCall(MatMumpsResetSchur_Private(mumps)); 2216 } 2217 2218 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2219 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2220 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2221 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2222 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2223 In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 2224 */ 2225 #if PETSC_PKG_MUMPS_VERSION_GE(5, 6, 2) && defined(PETSC_HAVE_MUMPS_AVOID_MPI_IN_PLACE) 2226 mumps->ICNTL20 = 10; 2227 #elif PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 2228 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2229 #else 2230 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2231 #endif 2232 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2233 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2234 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2235 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2236 #endif 2237 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2238 2239 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2240 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2241 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2242 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2243 2244 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2245 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2246 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2247 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2248 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2249 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2250 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2251 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2252 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2253 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2254 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2255 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2256 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2257 2258 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2259 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2260 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2261 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2262 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2263 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2264 2265 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2266 2267 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2268 if (ninfo) { 2269 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2270 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2271 mumps->ninfo = ninfo; 2272 for (i = 0; i < ninfo; i++) { 2273 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2274 mumps->info[i] = info[i]; 2275 } 2276 } 2277 PetscOptionsEnd(); 2278 PetscFunctionReturn(PETSC_SUCCESS); 2279 } 2280 2281 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps) 2282 { 2283 PetscFunctionBegin; 2284 if (mumps->id.INFOG(1) < 0) { 2285 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2286 if (mumps->id.INFOG(1) == -6) { 2287 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2288 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2289 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2290 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2291 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2292 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2293 PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n")); 2294 } else { 2295 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2296 F->factorerrortype = MAT_FACTOR_OTHER; 2297 } 2298 } 2299 PetscFunctionReturn(PETSC_SUCCESS); 2300 } 2301 2302 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2303 { 2304 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2305 Vec b; 2306 const PetscInt M = A->rmap->N; 2307 2308 PetscFunctionBegin; 2309 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2310 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2311 PetscFunctionReturn(PETSC_SUCCESS); 2312 } 2313 2314 /* Set MUMPS options from the options database */ 2315 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2316 2317 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2318 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2319 2320 /* analysis phase */ 2321 mumps->id.job = JOB_FACTSYMBOLIC; 2322 mumps->id.n = M; 2323 switch (mumps->id.ICNTL(18)) { 2324 case 0: /* centralized assembled matrix input */ 2325 if (!mumps->myid) { 2326 mumps->id.nnz = mumps->nnz; 2327 mumps->id.irn = mumps->irn; 2328 mumps->id.jcn = mumps->jcn; 2329 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2330 if (r && mumps->id.ICNTL(7) == 7) { 2331 mumps->id.ICNTL(7) = 1; 2332 if (!mumps->myid) { 2333 const PetscInt *idx; 2334 PetscInt i; 2335 2336 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2337 PetscCall(ISGetIndices(r, &idx)); 2338 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2339 PetscCall(ISRestoreIndices(r, &idx)); 2340 } 2341 } 2342 } 2343 break; 2344 case 3: /* distributed assembled matrix input (size>1) */ 2345 mumps->id.nnz_loc = mumps->nnz; 2346 mumps->id.irn_loc = mumps->irn; 2347 mumps->id.jcn_loc = mumps->jcn; 2348 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2349 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2350 PetscCall(MatCreateVecs(A, NULL, &b)); 2351 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2352 PetscCall(VecDestroy(&b)); 2353 } 2354 break; 2355 } 2356 PetscMUMPS_c(mumps); 2357 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2358 2359 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2360 F->ops->solve = MatSolve_MUMPS; 2361 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2362 F->ops->matsolve = MatMatSolve_MUMPS; 2363 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2364 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2365 2366 mumps->matstruc = SAME_NONZERO_PATTERN; 2367 PetscFunctionReturn(PETSC_SUCCESS); 2368 } 2369 2370 /* Note the Petsc r and c permutations are ignored */ 2371 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2372 { 2373 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2374 Vec b; 2375 const PetscInt M = A->rmap->N; 2376 2377 PetscFunctionBegin; 2378 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2379 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2380 PetscFunctionReturn(PETSC_SUCCESS); 2381 } 2382 2383 /* Set MUMPS options from the options database */ 2384 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2385 2386 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2387 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2388 2389 /* analysis phase */ 2390 mumps->id.job = JOB_FACTSYMBOLIC; 2391 mumps->id.n = M; 2392 switch (mumps->id.ICNTL(18)) { 2393 case 0: /* centralized assembled matrix input */ 2394 if (!mumps->myid) { 2395 mumps->id.nnz = mumps->nnz; 2396 mumps->id.irn = mumps->irn; 2397 mumps->id.jcn = mumps->jcn; 2398 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2399 } 2400 break; 2401 case 3: /* distributed assembled matrix input (size>1) */ 2402 mumps->id.nnz_loc = mumps->nnz; 2403 mumps->id.irn_loc = mumps->irn; 2404 mumps->id.jcn_loc = mumps->jcn; 2405 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2406 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2407 PetscCall(MatCreateVecs(A, NULL, &b)); 2408 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2409 PetscCall(VecDestroy(&b)); 2410 } 2411 break; 2412 } 2413 PetscMUMPS_c(mumps); 2414 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2415 2416 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2417 F->ops->solve = MatSolve_MUMPS; 2418 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2419 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2420 2421 mumps->matstruc = SAME_NONZERO_PATTERN; 2422 PetscFunctionReturn(PETSC_SUCCESS); 2423 } 2424 2425 /* Note the Petsc r permutation and factor info are ignored */ 2426 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info) 2427 { 2428 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2429 Vec b; 2430 const PetscInt M = A->rmap->N; 2431 2432 PetscFunctionBegin; 2433 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2434 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2435 PetscFunctionReturn(PETSC_SUCCESS); 2436 } 2437 2438 /* Set MUMPS options from the options database */ 2439 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2440 2441 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2442 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2443 2444 /* analysis phase */ 2445 mumps->id.job = JOB_FACTSYMBOLIC; 2446 mumps->id.n = M; 2447 switch (mumps->id.ICNTL(18)) { 2448 case 0: /* centralized assembled matrix input */ 2449 if (!mumps->myid) { 2450 mumps->id.nnz = mumps->nnz; 2451 mumps->id.irn = mumps->irn; 2452 mumps->id.jcn = mumps->jcn; 2453 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2454 } 2455 break; 2456 case 3: /* distributed assembled matrix input (size>1) */ 2457 mumps->id.nnz_loc = mumps->nnz; 2458 mumps->id.irn_loc = mumps->irn; 2459 mumps->id.jcn_loc = mumps->jcn; 2460 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2461 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2462 PetscCall(MatCreateVecs(A, NULL, &b)); 2463 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2464 PetscCall(VecDestroy(&b)); 2465 } 2466 break; 2467 } 2468 PetscMUMPS_c(mumps); 2469 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2470 2471 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2472 F->ops->solve = MatSolve_MUMPS; 2473 F->ops->solvetranspose = MatSolve_MUMPS; 2474 F->ops->matsolve = MatMatSolve_MUMPS; 2475 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2476 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2477 #if defined(PETSC_USE_COMPLEX) 2478 F->ops->getinertia = NULL; 2479 #else 2480 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2481 #endif 2482 2483 mumps->matstruc = SAME_NONZERO_PATTERN; 2484 PetscFunctionReturn(PETSC_SUCCESS); 2485 } 2486 2487 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2488 { 2489 PetscBool iascii; 2490 PetscViewerFormat format; 2491 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2492 2493 PetscFunctionBegin; 2494 /* check if matrix is mumps type */ 2495 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2496 2497 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2498 if (iascii) { 2499 PetscCall(PetscViewerGetFormat(viewer, &format)); 2500 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2501 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2502 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2503 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2504 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2505 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2506 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2507 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2508 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2509 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2510 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2511 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2512 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2513 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2514 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2515 if (mumps->id.ICNTL(11) > 0) { 2516 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)mumps->id.RINFOG(4))); 2517 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)mumps->id.RINFOG(5))); 2518 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)mumps->id.RINFOG(6))); 2519 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)mumps->id.RINFOG(7), (double)mumps->id.RINFOG(8))); 2520 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)mumps->id.RINFOG(9))); 2521 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)mumps->id.RINFOG(10), (double)mumps->id.RINFOG(11))); 2522 } 2523 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2524 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2525 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2526 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2527 /* ICNTL(15-17) not used */ 2528 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2529 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2530 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2531 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2532 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2533 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2534 2535 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2536 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2537 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2538 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2539 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2540 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2541 2542 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2543 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2544 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2545 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2546 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2547 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2548 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2549 2550 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)mumps->id.CNTL(1))); 2551 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)mumps->id.CNTL(2))); 2552 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)mumps->id.CNTL(3))); 2553 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)mumps->id.CNTL(4))); 2554 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)mumps->id.CNTL(5))); 2555 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)mumps->id.CNTL(7))); 2556 2557 /* information local to each processor */ 2558 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2559 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2560 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(1))); 2561 PetscCall(PetscViewerFlush(viewer)); 2562 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2563 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(2))); 2564 PetscCall(PetscViewerFlush(viewer)); 2565 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2566 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(3))); 2567 PetscCall(PetscViewerFlush(viewer)); 2568 2569 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2570 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2571 PetscCall(PetscViewerFlush(viewer)); 2572 2573 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2574 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2575 PetscCall(PetscViewerFlush(viewer)); 2576 2577 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2578 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2579 PetscCall(PetscViewerFlush(viewer)); 2580 2581 if (mumps->ninfo && mumps->ninfo <= 80) { 2582 PetscInt i; 2583 for (i = 0; i < mumps->ninfo; i++) { 2584 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2585 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2586 PetscCall(PetscViewerFlush(viewer)); 2587 } 2588 } 2589 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2590 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2591 2592 if (mumps->myid == 0) { /* information from the host */ 2593 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)mumps->id.RINFOG(1))); 2594 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)mumps->id.RINFOG(2))); 2595 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)mumps->id.RINFOG(3))); 2596 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)mumps->id.RINFOG(12), (double)mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2597 2598 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2599 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2600 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2601 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2602 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2603 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2604 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2605 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2606 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2607 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2608 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2609 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2610 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2611 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2612 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2613 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2614 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2615 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2616 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2617 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2618 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2619 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2620 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2621 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2622 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2623 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2624 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2625 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2626 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2627 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2628 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2629 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2630 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2631 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2632 } 2633 } 2634 } 2635 PetscFunctionReturn(PETSC_SUCCESS); 2636 } 2637 2638 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info) 2639 { 2640 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2641 2642 PetscFunctionBegin; 2643 info->block_size = 1.0; 2644 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2645 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2646 info->nz_unneeded = 0.0; 2647 info->assemblies = 0.0; 2648 info->mallocs = 0.0; 2649 info->memory = 0.0; 2650 info->fill_ratio_given = 0; 2651 info->fill_ratio_needed = 0; 2652 info->factor_mallocs = 0; 2653 PetscFunctionReturn(PETSC_SUCCESS); 2654 } 2655 2656 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2657 { 2658 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2659 const PetscScalar *arr; 2660 const PetscInt *idxs; 2661 PetscInt size, i; 2662 2663 PetscFunctionBegin; 2664 PetscCall(ISGetLocalSize(is, &size)); 2665 /* Schur complement matrix */ 2666 PetscCall(MatDestroy(&F->schur)); 2667 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2668 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2669 mumps->id.schur = (MumpsScalar *)arr; 2670 mumps->id.size_schur = size; 2671 mumps->id.schur_lld = size; 2672 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2673 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2674 2675 /* MUMPS expects Fortran style indices */ 2676 PetscCall(PetscFree(mumps->id.listvar_schur)); 2677 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2678 PetscCall(ISGetIndices(is, &idxs)); 2679 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2680 PetscCall(ISRestoreIndices(is, &idxs)); 2681 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2682 mumps->id.ICNTL(26) = -1; 2683 PetscFunctionReturn(PETSC_SUCCESS); 2684 } 2685 2686 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2687 { 2688 Mat St; 2689 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2690 PetscScalar *array; 2691 2692 PetscFunctionBegin; 2693 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 2694 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2695 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2696 PetscCall(MatSetType(St, MATDENSE)); 2697 PetscCall(MatSetUp(St)); 2698 PetscCall(MatDenseGetArray(St, &array)); 2699 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2700 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2701 PetscInt i, j, N = mumps->id.size_schur; 2702 for (i = 0; i < N; i++) { 2703 for (j = 0; j < N; j++) { 2704 #if !defined(PETSC_USE_COMPLEX) 2705 PetscScalar val = mumps->id.schur[i * N + j]; 2706 #else 2707 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2708 #endif 2709 array[j * N + i] = val; 2710 } 2711 } 2712 } else { /* stored by columns */ 2713 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2714 } 2715 } else { /* either full or lower-triangular (not packed) */ 2716 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2717 PetscInt i, j, N = mumps->id.size_schur; 2718 for (i = 0; i < N; i++) { 2719 for (j = i; j < N; j++) { 2720 #if !defined(PETSC_USE_COMPLEX) 2721 PetscScalar val = mumps->id.schur[i * N + j]; 2722 #else 2723 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2724 #endif 2725 array[i * N + j] = array[j * N + i] = val; 2726 } 2727 } 2728 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2729 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2730 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2731 PetscInt i, j, N = mumps->id.size_schur; 2732 for (i = 0; i < N; i++) { 2733 for (j = 0; j < i + 1; j++) { 2734 #if !defined(PETSC_USE_COMPLEX) 2735 PetscScalar val = mumps->id.schur[i * N + j]; 2736 #else 2737 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2738 #endif 2739 array[i * N + j] = array[j * N + i] = val; 2740 } 2741 } 2742 } 2743 } 2744 PetscCall(MatDenseRestoreArray(St, &array)); 2745 *S = St; 2746 PetscFunctionReturn(PETSC_SUCCESS); 2747 } 2748 2749 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2750 { 2751 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2752 2753 PetscFunctionBegin; 2754 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2755 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2756 for (i = 0; i < nICNTL_pre; ++i) 2757 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2758 if (i == nICNTL_pre) { /* not already cached */ 2759 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2760 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2761 mumps->ICNTL_pre[0]++; 2762 } 2763 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2764 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2765 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2766 PetscFunctionReturn(PETSC_SUCCESS); 2767 } 2768 2769 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2770 { 2771 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2772 2773 PetscFunctionBegin; 2774 if (mumps->id.job == JOB_NULL) { 2775 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2776 *ival = 0; 2777 for (i = 0; i < nICNTL_pre; ++i) { 2778 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2779 } 2780 } else *ival = mumps->id.ICNTL(icntl); 2781 PetscFunctionReturn(PETSC_SUCCESS); 2782 } 2783 2784 /*@ 2785 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 2786 2787 Logically Collective 2788 2789 Input Parameters: 2790 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2791 . icntl - index of MUMPS parameter array ICNTL() 2792 - ival - value of MUMPS ICNTL(icntl) 2793 2794 Options Database Key: 2795 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2796 2797 Level: beginner 2798 2799 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2800 @*/ 2801 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2802 { 2803 PetscFunctionBegin; 2804 PetscValidType(F, 1); 2805 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2806 PetscValidLogicalCollectiveInt(F, icntl, 2); 2807 PetscValidLogicalCollectiveInt(F, ival, 3); 2808 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2809 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2810 PetscFunctionReturn(PETSC_SUCCESS); 2811 } 2812 2813 /*@ 2814 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 2815 2816 Logically Collective 2817 2818 Input Parameters: 2819 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2820 - icntl - index of MUMPS parameter array ICNTL() 2821 2822 Output Parameter: 2823 . ival - value of MUMPS ICNTL(icntl) 2824 2825 Level: beginner 2826 2827 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2828 @*/ 2829 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2830 { 2831 PetscFunctionBegin; 2832 PetscValidType(F, 1); 2833 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2834 PetscValidLogicalCollectiveInt(F, icntl, 2); 2835 PetscAssertPointer(ival, 3); 2836 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2837 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2838 PetscFunctionReturn(PETSC_SUCCESS); 2839 } 2840 2841 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2842 { 2843 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2844 2845 PetscFunctionBegin; 2846 if (mumps->id.job == JOB_NULL) { 2847 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2848 for (i = 0; i < nCNTL_pre; ++i) 2849 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2850 if (i == nCNTL_pre) { 2851 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2852 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2853 mumps->CNTL_pre[0]++; 2854 } 2855 mumps->CNTL_pre[1 + 2 * i] = icntl; 2856 mumps->CNTL_pre[2 + 2 * i] = val; 2857 } else mumps->id.CNTL(icntl) = val; 2858 PetscFunctionReturn(PETSC_SUCCESS); 2859 } 2860 2861 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2862 { 2863 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2864 2865 PetscFunctionBegin; 2866 if (mumps->id.job == JOB_NULL) { 2867 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2868 *val = 0.0; 2869 for (i = 0; i < nCNTL_pre; ++i) { 2870 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2871 } 2872 } else *val = mumps->id.CNTL(icntl); 2873 PetscFunctionReturn(PETSC_SUCCESS); 2874 } 2875 2876 /*@ 2877 MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 2878 2879 Logically Collective 2880 2881 Input Parameters: 2882 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2883 . icntl - index of MUMPS parameter array CNTL() 2884 - val - value of MUMPS CNTL(icntl) 2885 2886 Options Database Key: 2887 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2888 2889 Level: beginner 2890 2891 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2892 @*/ 2893 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2894 { 2895 PetscFunctionBegin; 2896 PetscValidType(F, 1); 2897 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2898 PetscValidLogicalCollectiveInt(F, icntl, 2); 2899 PetscValidLogicalCollectiveReal(F, val, 3); 2900 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2901 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2902 PetscFunctionReturn(PETSC_SUCCESS); 2903 } 2904 2905 /*@ 2906 MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 2907 2908 Logically Collective 2909 2910 Input Parameters: 2911 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2912 - icntl - index of MUMPS parameter array CNTL() 2913 2914 Output Parameter: 2915 . val - value of MUMPS CNTL(icntl) 2916 2917 Level: beginner 2918 2919 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2920 @*/ 2921 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2922 { 2923 PetscFunctionBegin; 2924 PetscValidType(F, 1); 2925 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2926 PetscValidLogicalCollectiveInt(F, icntl, 2); 2927 PetscAssertPointer(val, 3); 2928 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2929 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2930 PetscFunctionReturn(PETSC_SUCCESS); 2931 } 2932 2933 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2934 { 2935 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2936 2937 PetscFunctionBegin; 2938 *info = mumps->id.INFO(icntl); 2939 PetscFunctionReturn(PETSC_SUCCESS); 2940 } 2941 2942 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2943 { 2944 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2945 2946 PetscFunctionBegin; 2947 *infog = mumps->id.INFOG(icntl); 2948 PetscFunctionReturn(PETSC_SUCCESS); 2949 } 2950 2951 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2952 { 2953 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2954 2955 PetscFunctionBegin; 2956 *rinfo = mumps->id.RINFO(icntl); 2957 PetscFunctionReturn(PETSC_SUCCESS); 2958 } 2959 2960 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2961 { 2962 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2963 2964 PetscFunctionBegin; 2965 *rinfog = mumps->id.RINFOG(icntl); 2966 PetscFunctionReturn(PETSC_SUCCESS); 2967 } 2968 2969 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2970 { 2971 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2972 2973 PetscFunctionBegin; 2974 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2975 *size = 0; 2976 *array = NULL; 2977 if (!mumps->myid) { 2978 *size = mumps->id.INFOG(28); 2979 PetscCall(PetscMalloc1(*size, array)); 2980 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2981 } 2982 PetscFunctionReturn(PETSC_SUCCESS); 2983 } 2984 2985 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2986 { 2987 Mat Bt = NULL, Btseq = NULL; 2988 PetscBool flg; 2989 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2990 PetscScalar *aa; 2991 PetscInt spnr, *ia, *ja, M, nrhs; 2992 2993 PetscFunctionBegin; 2994 PetscAssertPointer(spRHS, 2); 2995 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2996 if (flg) { 2997 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2998 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2999 3000 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3001 3002 if (mumps->petsc_size > 1) { 3003 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 3004 Btseq = b->A; 3005 } else { 3006 Btseq = Bt; 3007 } 3008 3009 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 3010 mumps->id.nrhs = nrhs; 3011 mumps->id.lrhs = M; 3012 mumps->id.rhs = NULL; 3013 3014 if (!mumps->myid) { 3015 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 3016 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3017 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3018 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3019 mumps->id.rhs_sparse = (MumpsScalar *)aa; 3020 } else { 3021 mumps->id.irhs_ptr = NULL; 3022 mumps->id.irhs_sparse = NULL; 3023 mumps->id.nz_rhs = 0; 3024 mumps->id.rhs_sparse = NULL; 3025 } 3026 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3027 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3028 3029 /* solve phase */ 3030 mumps->id.job = JOB_SOLVE; 3031 PetscMUMPS_c(mumps); 3032 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 3033 3034 if (!mumps->myid) { 3035 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3036 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3037 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3038 } 3039 PetscFunctionReturn(PETSC_SUCCESS); 3040 } 3041 3042 /*@ 3043 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc> 3044 3045 Logically Collective 3046 3047 Input Parameter: 3048 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3049 3050 Output Parameter: 3051 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3052 3053 Level: beginner 3054 3055 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3056 @*/ 3057 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3058 { 3059 PetscFunctionBegin; 3060 PetscValidType(F, 1); 3061 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3062 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3063 PetscFunctionReturn(PETSC_SUCCESS); 3064 } 3065 3066 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3067 { 3068 Mat spRHS; 3069 3070 PetscFunctionBegin; 3071 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3072 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3073 PetscCall(MatDestroy(&spRHS)); 3074 PetscFunctionReturn(PETSC_SUCCESS); 3075 } 3076 3077 /*@ 3078 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc> 3079 3080 Logically Collective 3081 3082 Input Parameter: 3083 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3084 3085 Output Parameter: 3086 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3087 3088 Level: beginner 3089 3090 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3091 @*/ 3092 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3093 { 3094 PetscBool flg; 3095 3096 PetscFunctionBegin; 3097 PetscValidType(F, 1); 3098 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3099 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3100 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3101 3102 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3103 PetscFunctionReturn(PETSC_SUCCESS); 3104 } 3105 3106 /*@ 3107 MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc> 3108 3109 Logically Collective 3110 3111 Input Parameters: 3112 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3113 - icntl - index of MUMPS parameter array INFO() 3114 3115 Output Parameter: 3116 . ival - value of MUMPS INFO(icntl) 3117 3118 Level: beginner 3119 3120 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3121 @*/ 3122 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3123 { 3124 PetscFunctionBegin; 3125 PetscValidType(F, 1); 3126 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3127 PetscAssertPointer(ival, 3); 3128 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3129 PetscFunctionReturn(PETSC_SUCCESS); 3130 } 3131 3132 /*@ 3133 MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc> 3134 3135 Logically Collective 3136 3137 Input Parameters: 3138 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3139 - icntl - index of MUMPS parameter array INFOG() 3140 3141 Output Parameter: 3142 . ival - value of MUMPS INFOG(icntl) 3143 3144 Level: beginner 3145 3146 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3147 @*/ 3148 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3149 { 3150 PetscFunctionBegin; 3151 PetscValidType(F, 1); 3152 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3153 PetscAssertPointer(ival, 3); 3154 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3155 PetscFunctionReturn(PETSC_SUCCESS); 3156 } 3157 3158 /*@ 3159 MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc> 3160 3161 Logically Collective 3162 3163 Input Parameters: 3164 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3165 - icntl - index of MUMPS parameter array RINFO() 3166 3167 Output Parameter: 3168 . val - value of MUMPS RINFO(icntl) 3169 3170 Level: beginner 3171 3172 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3173 @*/ 3174 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3175 { 3176 PetscFunctionBegin; 3177 PetscValidType(F, 1); 3178 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3179 PetscAssertPointer(val, 3); 3180 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3181 PetscFunctionReturn(PETSC_SUCCESS); 3182 } 3183 3184 /*@ 3185 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc> 3186 3187 Logically Collective 3188 3189 Input Parameters: 3190 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3191 - icntl - index of MUMPS parameter array RINFOG() 3192 3193 Output Parameter: 3194 . val - value of MUMPS RINFOG(icntl) 3195 3196 Level: beginner 3197 3198 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3199 @*/ 3200 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3201 { 3202 PetscFunctionBegin; 3203 PetscValidType(F, 1); 3204 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3205 PetscAssertPointer(val, 3); 3206 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3207 PetscFunctionReturn(PETSC_SUCCESS); 3208 } 3209 3210 /*@ 3211 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc> 3212 3213 Logically Collective 3214 3215 Input Parameter: 3216 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3217 3218 Output Parameters: 3219 + size - local size of the array. The size of the array is non-zero only on the host. 3220 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3221 for freeing this array. 3222 3223 Level: beginner 3224 3225 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3226 @*/ 3227 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3228 { 3229 PetscFunctionBegin; 3230 PetscValidType(F, 1); 3231 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3232 PetscAssertPointer(size, 2); 3233 PetscAssertPointer(array, 3); 3234 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3235 PetscFunctionReturn(PETSC_SUCCESS); 3236 } 3237 3238 /*MC 3239 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3240 distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc> 3241 3242 Works with `MATAIJ` and `MATSBAIJ` matrices 3243 3244 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3245 3246 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3247 See details below. 3248 3249 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3250 3251 Options Database Keys: 3252 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3253 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3254 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3255 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3256 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3257 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3258 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3259 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3260 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3261 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3262 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3263 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3264 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3265 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3266 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3267 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3268 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3269 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3270 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3271 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3272 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3273 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 3274 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3275 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3276 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3277 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3278 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3279 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3280 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3281 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3282 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3283 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3284 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3285 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3286 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3287 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3288 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3289 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3290 3291 Level: beginner 3292 3293 Notes: 3294 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will 3295 error if the matrix is Hermitian. 3296 3297 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3298 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3299 3300 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3301 the failure with 3302 .vb 3303 KSPGetPC(ksp,&pc); 3304 PCFactorGetMatrix(pc,&mat); 3305 MatMumpsGetInfo(mat,....); 3306 MatMumpsGetInfog(mat,....); etc. 3307 .ve 3308 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3309 3310 MUMPS provides 64-bit integer support in two build modes: 3311 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3312 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3313 3314 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3315 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3316 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3317 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3318 3319 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3320 3321 Two modes to run MUMPS/PETSc with OpenMP 3322 .vb 3323 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3324 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3325 .ve 3326 3327 .vb 3328 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3329 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3330 .ve 3331 3332 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3333 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3334 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3335 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3336 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3337 3338 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3339 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3340 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3341 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3342 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3343 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3344 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3345 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3346 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3347 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3348 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3349 examine the mapping result. 3350 3351 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3352 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3353 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3354 3355 See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating` 3356 3357 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3358 M*/ 3359 3360 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type) 3361 { 3362 PetscFunctionBegin; 3363 *type = MATSOLVERMUMPS; 3364 PetscFunctionReturn(PETSC_SUCCESS); 3365 } 3366 3367 /* MatGetFactor for Seq and MPI AIJ matrices */ 3368 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3369 { 3370 Mat B; 3371 Mat_MUMPS *mumps; 3372 PetscBool isSeqAIJ, isDiag, isDense; 3373 PetscMPIInt size; 3374 3375 PetscFunctionBegin; 3376 #if defined(PETSC_USE_COMPLEX) 3377 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3378 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3379 *F = NULL; 3380 PetscFunctionReturn(PETSC_SUCCESS); 3381 } 3382 #endif 3383 /* Create the factorization matrix */ 3384 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3385 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 3386 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3387 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3388 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3389 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3390 PetscCall(MatSetUp(B)); 3391 3392 PetscCall(PetscNew(&mumps)); 3393 3394 B->ops->view = MatView_MUMPS; 3395 B->ops->getinfo = MatGetInfo_MUMPS; 3396 3397 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3398 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3399 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3400 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3401 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3402 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3403 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3404 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3405 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3406 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3407 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3408 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3409 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3410 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3411 3412 if (ftype == MAT_FACTOR_LU) { 3413 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3414 B->factortype = MAT_FACTOR_LU; 3415 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3416 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3417 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3418 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3419 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3420 mumps->sym = 0; 3421 } else { 3422 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3423 B->factortype = MAT_FACTOR_CHOLESKY; 3424 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3425 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3426 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3427 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3428 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3429 #if defined(PETSC_USE_COMPLEX) 3430 mumps->sym = 2; 3431 #else 3432 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3433 else mumps->sym = 2; 3434 #endif 3435 } 3436 3437 /* set solvertype */ 3438 PetscCall(PetscFree(B->solvertype)); 3439 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3440 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3441 if (size == 1) { 3442 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3443 B->canuseordering = PETSC_TRUE; 3444 } 3445 B->ops->destroy = MatDestroy_MUMPS; 3446 B->data = (void *)mumps; 3447 3448 *F = B; 3449 mumps->id.job = JOB_NULL; 3450 mumps->ICNTL_pre = NULL; 3451 mumps->CNTL_pre = NULL; 3452 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3453 PetscFunctionReturn(PETSC_SUCCESS); 3454 } 3455 3456 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3457 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F) 3458 { 3459 Mat B; 3460 Mat_MUMPS *mumps; 3461 PetscBool isSeqSBAIJ; 3462 PetscMPIInt size; 3463 3464 PetscFunctionBegin; 3465 #if defined(PETSC_USE_COMPLEX) 3466 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3467 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3468 *F = NULL; 3469 PetscFunctionReturn(PETSC_SUCCESS); 3470 } 3471 #endif 3472 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3473 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3474 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3475 PetscCall(MatSetUp(B)); 3476 3477 PetscCall(PetscNew(&mumps)); 3478 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3479 if (isSeqSBAIJ) { 3480 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3481 } else { 3482 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3483 } 3484 3485 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3486 B->ops->view = MatView_MUMPS; 3487 B->ops->getinfo = MatGetInfo_MUMPS; 3488 3489 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3490 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3491 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3492 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3493 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3494 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3495 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3496 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3497 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3498 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3499 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3500 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3501 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3502 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3503 3504 B->factortype = MAT_FACTOR_CHOLESKY; 3505 #if defined(PETSC_USE_COMPLEX) 3506 mumps->sym = 2; 3507 #else 3508 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3509 else mumps->sym = 2; 3510 #endif 3511 3512 /* set solvertype */ 3513 PetscCall(PetscFree(B->solvertype)); 3514 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3515 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3516 if (size == 1) { 3517 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3518 B->canuseordering = PETSC_TRUE; 3519 } 3520 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3521 B->ops->destroy = MatDestroy_MUMPS; 3522 B->data = (void *)mumps; 3523 3524 *F = B; 3525 mumps->id.job = JOB_NULL; 3526 mumps->ICNTL_pre = NULL; 3527 mumps->CNTL_pre = NULL; 3528 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3529 PetscFunctionReturn(PETSC_SUCCESS); 3530 } 3531 3532 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3533 { 3534 Mat B; 3535 Mat_MUMPS *mumps; 3536 PetscBool isSeqBAIJ; 3537 PetscMPIInt size; 3538 3539 PetscFunctionBegin; 3540 /* Create the factorization matrix */ 3541 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3542 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3543 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3544 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3545 PetscCall(MatSetUp(B)); 3546 3547 PetscCall(PetscNew(&mumps)); 3548 if (ftype == MAT_FACTOR_LU) { 3549 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3550 B->factortype = MAT_FACTOR_LU; 3551 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3552 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3553 mumps->sym = 0; 3554 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3555 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3556 3557 B->ops->view = MatView_MUMPS; 3558 B->ops->getinfo = MatGetInfo_MUMPS; 3559 3560 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3561 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3562 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3563 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3564 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3565 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3566 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3567 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3568 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3569 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3570 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3571 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3572 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3573 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3574 3575 /* set solvertype */ 3576 PetscCall(PetscFree(B->solvertype)); 3577 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3578 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3579 if (size == 1) { 3580 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3581 B->canuseordering = PETSC_TRUE; 3582 } 3583 B->ops->destroy = MatDestroy_MUMPS; 3584 B->data = (void *)mumps; 3585 3586 *F = B; 3587 mumps->id.job = JOB_NULL; 3588 mumps->ICNTL_pre = NULL; 3589 mumps->CNTL_pre = NULL; 3590 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3591 PetscFunctionReturn(PETSC_SUCCESS); 3592 } 3593 3594 /* MatGetFactor for Seq and MPI SELL matrices */ 3595 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3596 { 3597 Mat B; 3598 Mat_MUMPS *mumps; 3599 PetscBool isSeqSELL; 3600 PetscMPIInt size; 3601 3602 PetscFunctionBegin; 3603 /* Create the factorization matrix */ 3604 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3605 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3606 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3607 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3608 PetscCall(MatSetUp(B)); 3609 3610 PetscCall(PetscNew(&mumps)); 3611 3612 B->ops->view = MatView_MUMPS; 3613 B->ops->getinfo = MatGetInfo_MUMPS; 3614 3615 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3616 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3617 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3618 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3619 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3620 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3621 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3622 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3623 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3624 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3625 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3626 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3627 3628 if (ftype == MAT_FACTOR_LU) { 3629 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3630 B->factortype = MAT_FACTOR_LU; 3631 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3632 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3633 mumps->sym = 0; 3634 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3635 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3636 3637 /* set solvertype */ 3638 PetscCall(PetscFree(B->solvertype)); 3639 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3640 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3641 if (size == 1) { 3642 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3643 B->canuseordering = PETSC_TRUE; 3644 } 3645 B->ops->destroy = MatDestroy_MUMPS; 3646 B->data = (void *)mumps; 3647 3648 *F = B; 3649 mumps->id.job = JOB_NULL; 3650 mumps->ICNTL_pre = NULL; 3651 mumps->CNTL_pre = NULL; 3652 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3653 PetscFunctionReturn(PETSC_SUCCESS); 3654 } 3655 3656 /* MatGetFactor for MATNEST matrices */ 3657 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3658 { 3659 Mat B, **mats; 3660 Mat_MUMPS *mumps; 3661 PetscInt nr, nc; 3662 PetscMPIInt size; 3663 PetscBool flg = PETSC_TRUE; 3664 3665 PetscFunctionBegin; 3666 #if defined(PETSC_USE_COMPLEX) 3667 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3668 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3669 *F = NULL; 3670 PetscFunctionReturn(PETSC_SUCCESS); 3671 } 3672 #endif 3673 3674 /* Return if some condition is not satisfied */ 3675 *F = NULL; 3676 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3677 if (ftype == MAT_FACTOR_CHOLESKY) { 3678 IS *rows, *cols; 3679 PetscInt *m, *M; 3680 3681 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3682 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3683 PetscCall(MatNestGetISs(A, rows, cols)); 3684 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3685 if (!flg) { 3686 PetscCall(PetscFree2(rows, cols)); 3687 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3688 PetscFunctionReturn(PETSC_SUCCESS); 3689 } 3690 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3691 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3692 for (PetscInt r = 0; flg && r < nr; r++) 3693 for (PetscInt k = r + 1; flg && k < nr; k++) 3694 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3695 PetscCall(PetscFree2(m, M)); 3696 PetscCall(PetscFree2(rows, cols)); 3697 if (!flg) { 3698 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3699 PetscFunctionReturn(PETSC_SUCCESS); 3700 } 3701 } 3702 3703 for (PetscInt r = 0; r < nr; r++) { 3704 for (PetscInt c = 0; c < nc; c++) { 3705 Mat sub = mats[r][c]; 3706 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isDiag, isDense; 3707 3708 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3709 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3710 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3711 else { 3712 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3713 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3714 } 3715 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3716 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3717 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3718 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3719 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3720 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3721 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 3722 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3723 if (ftype == MAT_FACTOR_CHOLESKY) { 3724 if (r == c) { 3725 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 3726 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3727 flg = PETSC_FALSE; 3728 } 3729 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3730 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3731 flg = PETSC_FALSE; 3732 } 3733 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3734 PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3735 flg = PETSC_FALSE; 3736 } 3737 } 3738 } 3739 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3740 3741 /* Create the factorization matrix */ 3742 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3743 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3744 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3745 PetscCall(MatSetUp(B)); 3746 3747 PetscCall(PetscNew(&mumps)); 3748 3749 B->ops->view = MatView_MUMPS; 3750 B->ops->getinfo = MatGetInfo_MUMPS; 3751 3752 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3753 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3754 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3755 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3756 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3757 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3758 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3759 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3760 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3761 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3762 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3763 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3764 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3765 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3766 3767 if (ftype == MAT_FACTOR_LU) { 3768 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3769 B->factortype = MAT_FACTOR_LU; 3770 mumps->sym = 0; 3771 } else { 3772 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3773 B->factortype = MAT_FACTOR_CHOLESKY; 3774 #if defined(PETSC_USE_COMPLEX) 3775 mumps->sym = 2; 3776 #else 3777 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3778 else mumps->sym = 2; 3779 #endif 3780 } 3781 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3782 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3783 3784 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3785 if (size == 1) { 3786 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3787 B->canuseordering = PETSC_TRUE; 3788 } 3789 3790 /* set solvertype */ 3791 PetscCall(PetscFree(B->solvertype)); 3792 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3793 B->ops->destroy = MatDestroy_MUMPS; 3794 B->data = (void *)mumps; 3795 3796 *F = B; 3797 mumps->id.job = JOB_NULL; 3798 mumps->ICNTL_pre = NULL; 3799 mumps->CNTL_pre = NULL; 3800 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3801 PetscFunctionReturn(PETSC_SUCCESS); 3802 } 3803 3804 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3805 { 3806 PetscFunctionBegin; 3807 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3808 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3809 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3810 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3811 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3812 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3813 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3814 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3815 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3816 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3817 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3818 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3819 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3820 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3821 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3822 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3823 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3824 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3825 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3826 PetscFunctionReturn(PETSC_SUCCESS); 3827 } 3828