1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #if defined(PETSC_USE_REAL_SINGLE) 15 #include <cmumps_c.h> 16 #else 17 #include <zmumps_c.h> 18 #endif 19 #else 20 #if defined(PETSC_USE_REAL_SINGLE) 21 #include <smumps_c.h> 22 #else 23 #include <dmumps_c.h> 24 #endif 25 #endif 26 EXTERN_C_END 27 #define JOB_INIT -1 28 #define JOB_NULL 0 29 #define JOB_FACTSYMBOLIC 1 30 #define JOB_FACTNUMERIC 2 31 #define JOB_SOLVE 3 32 #define JOB_END -2 33 34 /* calls to MUMPS */ 35 #if defined(PETSC_USE_COMPLEX) 36 #if defined(PETSC_USE_REAL_SINGLE) 37 #define MUMPS_c cmumps_c 38 #else 39 #define MUMPS_c zmumps_c 40 #endif 41 #else 42 #if defined(PETSC_USE_REAL_SINGLE) 43 #define MUMPS_c smumps_c 44 #else 45 #define MUMPS_c dmumps_c 46 #endif 47 #endif 48 49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 50 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 51 naming convention in PetscMPIInt, PetscBLASInt etc. 52 */ 53 typedef MUMPS_INT PetscMUMPSInt; 54 55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 56 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 57 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 58 #endif 59 #else 60 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 61 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 62 #endif 63 #endif 64 65 #define MPIU_MUMPSINT MPI_INT 66 #define PETSC_MUMPS_INT_MAX 2147483647 67 #define PETSC_MUMPS_INT_MIN -2147483648 68 69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 70 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 71 { 72 PetscFunctionBegin; 73 #if PetscDefined(USE_64BIT_INDICES) 74 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 75 #endif 76 *b = (PetscMUMPSInt)(a); 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 /* Put these utility routines here since they are only used in this file */ 81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 82 { 83 PetscInt myval; 84 PetscBool myset; 85 PetscFunctionBegin; 86 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 87 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 88 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 89 if (set) *set = myset; 90 PetscFunctionReturn(PETSC_SUCCESS); 91 } 92 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 93 94 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 95 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 96 #define PetscMUMPS_c(mumps) \ 97 do { \ 98 if (mumps->use_petsc_omp_support) { \ 99 if (mumps->is_omp_master) { \ 100 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 101 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 102 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 103 PetscCall(PetscFPTrapPop()); \ 104 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 105 } \ 106 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 107 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 108 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 109 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 110 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 111 */ \ 112 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 113 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 115 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 116 } else { \ 117 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 118 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 119 PetscCall(PetscFPTrapPop()); \ 120 } \ 121 } while (0) 122 #else 123 #define PetscMUMPS_c(mumps) \ 124 do { \ 125 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 126 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 127 PetscCall(PetscFPTrapPop()); \ 128 } while (0) 129 #endif 130 131 /* declare MumpsScalar */ 132 #if defined(PETSC_USE_COMPLEX) 133 #if defined(PETSC_USE_REAL_SINGLE) 134 #define MumpsScalar mumps_complex 135 #else 136 #define MumpsScalar mumps_double_complex 137 #endif 138 #else 139 #define MumpsScalar PetscScalar 140 #endif 141 142 /* macros s.t. indices match MUMPS documentation */ 143 #define ICNTL(I) icntl[(I)-1] 144 #define CNTL(I) cntl[(I)-1] 145 #define INFOG(I) infog[(I)-1] 146 #define INFO(I) info[(I)-1] 147 #define RINFOG(I) rinfog[(I)-1] 148 #define RINFO(I) rinfo[(I)-1] 149 150 typedef struct Mat_MUMPS Mat_MUMPS; 151 struct Mat_MUMPS { 152 #if defined(PETSC_USE_COMPLEX) 153 #if defined(PETSC_USE_REAL_SINGLE) 154 CMUMPS_STRUC_C id; 155 #else 156 ZMUMPS_STRUC_C id; 157 #endif 158 #else 159 #if defined(PETSC_USE_REAL_SINGLE) 160 SMUMPS_STRUC_C id; 161 #else 162 DMUMPS_STRUC_C id; 163 #endif 164 #endif 165 166 MatStructure matstruc; 167 PetscMPIInt myid, petsc_size; 168 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 169 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 170 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 171 PetscMUMPSInt sym; 172 MPI_Comm mumps_comm; 173 PetscMUMPSInt *ICNTL_pre; 174 PetscReal *CNTL_pre; 175 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 176 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 177 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 178 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 179 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 180 PetscInt *rhs_nrow, max_nrhs; 181 PetscMPIInt *rhs_recvcounts, *rhs_disps; 182 PetscScalar *rhs_loc, *rhs_recvbuf; 183 #endif 184 Vec b_seq, x_seq; 185 PetscInt ninfo, *info; /* which INFO to display */ 186 PetscInt sizeredrhs; 187 PetscScalar *schur_sol; 188 PetscInt schur_sizesol; 189 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 190 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 191 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 192 193 /* Support for MATNEST */ 194 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 195 PetscInt64 *nest_vals_start; 196 PetscScalar *nest_vals; 197 198 /* stuff used by petsc/mumps OpenMP support*/ 199 PetscBool use_petsc_omp_support; 200 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 201 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 202 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 203 PetscMPIInt tag, omp_comm_size; 204 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 205 MPI_Request *reqs; 206 }; 207 208 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 209 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 210 */ 211 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 212 { 213 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 214 215 PetscFunctionBegin; 216 #if defined(PETSC_USE_64BIT_INDICES) 217 { 218 PetscInt i; 219 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 220 PetscCall(PetscFree(mumps->ia_alloc)); 221 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 222 mumps->cur_ilen = nrow + 1; 223 } 224 if (nnz > mumps->cur_jlen) { 225 PetscCall(PetscFree(mumps->ja_alloc)); 226 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 227 mumps->cur_jlen = nnz; 228 } 229 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 230 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 231 *ia_mumps = mumps->ia_alloc; 232 *ja_mumps = mumps->ja_alloc; 233 } 234 #else 235 *ia_mumps = ia; 236 *ja_mumps = ja; 237 #endif 238 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 243 { 244 PetscFunctionBegin; 245 PetscCall(PetscFree(mumps->id.listvar_schur)); 246 PetscCall(PetscFree(mumps->id.redrhs)); 247 PetscCall(PetscFree(mumps->schur_sol)); 248 mumps->id.size_schur = 0; 249 mumps->id.schur_lld = 0; 250 mumps->id.ICNTL(19) = 0; 251 PetscFunctionReturn(PETSC_SUCCESS); 252 } 253 254 /* solve with rhs in mumps->id.redrhs and return in the same location */ 255 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 256 { 257 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 258 Mat S, B, X; 259 MatFactorSchurStatus schurstatus; 260 PetscInt sizesol; 261 262 PetscFunctionBegin; 263 PetscCall(MatFactorFactorizeSchurComplement(F)); 264 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 265 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 266 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 267 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 268 PetscCall(MatBindToCPU(B, S->boundtocpu)); 269 #endif 270 switch (schurstatus) { 271 case MAT_FACTOR_SCHUR_FACTORED: 272 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 273 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 274 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 275 PetscCall(MatBindToCPU(X, S->boundtocpu)); 276 #endif 277 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 278 PetscCall(MatMatSolveTranspose(S, B, X)); 279 } else { 280 PetscCall(MatMatSolve(S, B, X)); 281 } 282 break; 283 case MAT_FACTOR_SCHUR_INVERTED: 284 sizesol = mumps->id.nrhs * mumps->id.size_schur; 285 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 286 PetscCall(PetscFree(mumps->schur_sol)); 287 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 288 mumps->schur_sizesol = sizesol; 289 } 290 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 291 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 292 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 293 PetscCall(MatBindToCPU(X, S->boundtocpu)); 294 #endif 295 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 296 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 297 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 298 } else { 299 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 300 } 301 PetscCall(MatProductSetFromOptions(X)); 302 PetscCall(MatProductSymbolic(X)); 303 PetscCall(MatProductNumeric(X)); 304 305 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 306 break; 307 default: 308 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 309 } 310 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 311 PetscCall(MatDestroy(&B)); 312 PetscCall(MatDestroy(&X)); 313 PetscFunctionReturn(PETSC_SUCCESS); 314 } 315 316 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 317 { 318 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 319 320 PetscFunctionBegin; 321 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 if (!expansion) { /* prepare for the condensation step */ 325 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 326 /* allocate MUMPS internal array to store reduced right-hand sides */ 327 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 328 PetscCall(PetscFree(mumps->id.redrhs)); 329 mumps->id.lredrhs = mumps->id.size_schur; 330 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 331 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 332 } 333 } else { /* prepare for the expansion step */ 334 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 335 PetscCall(MatMumpsSolveSchur_Private(F)); 336 mumps->id.ICNTL(26) = 2; /* expansion phase */ 337 PetscMUMPS_c(mumps); 338 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 339 /* restore defaults */ 340 mumps->id.ICNTL(26) = -1; 341 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 342 if (mumps->id.nrhs > 1) { 343 PetscCall(PetscFree(mumps->id.redrhs)); 344 mumps->id.lredrhs = 0; 345 mumps->sizeredrhs = 0; 346 } 347 } 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 /* 352 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 353 354 input: 355 A - matrix in aij,baij or sbaij format 356 shift - 0: C style output triple; 1: Fortran style output triple. 357 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 358 MAT_REUSE_MATRIX: only the values in v array are updated 359 output: 360 nnz - dim of r, c, and v (number of local nonzero entries of A) 361 r, c, v - row and col index, matrix values (matrix triples) 362 363 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 364 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 365 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 366 367 */ 368 369 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 370 { 371 const PetscScalar *av; 372 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 373 PetscInt64 nz, rnz, i, j, k; 374 PetscMUMPSInt *row, *col; 375 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 376 377 PetscFunctionBegin; 378 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 379 if (reuse == MAT_INITIAL_MATRIX) { 380 nz = aa->nz; 381 ai = aa->i; 382 aj = aa->j; 383 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 384 for (i = k = 0; i < M; i++) { 385 rnz = ai[i + 1] - ai[i]; 386 ajj = aj + ai[i]; 387 for (j = 0; j < rnz; j++) { 388 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 389 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 390 k++; 391 } 392 } 393 mumps->val = (PetscScalar *)av; 394 mumps->irn = row; 395 mumps->jcn = col; 396 mumps->nnz = nz; 397 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 398 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 399 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 404 { 405 PetscInt64 nz, i, j, k, r; 406 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 407 PetscMUMPSInt *row, *col; 408 409 PetscFunctionBegin; 410 nz = a->sliidx[a->totalslices]; 411 if (reuse == MAT_INITIAL_MATRIX) { 412 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 413 for (i = k = 0; i < a->totalslices; i++) { 414 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 415 } 416 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 417 mumps->irn = row; 418 mumps->jcn = col; 419 mumps->nnz = nz; 420 mumps->val = a->val; 421 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 422 else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 427 { 428 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 429 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 430 PetscInt64 M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 431 PetscInt bs; 432 PetscMUMPSInt *row, *col; 433 434 PetscFunctionBegin; 435 if (reuse == MAT_INITIAL_MATRIX) { 436 PetscCall(MatGetBlockSize(A, &bs)); 437 M = A->rmap->N / bs; 438 ai = aa->i; 439 aj = aa->j; 440 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 441 for (i = 0; i < M; i++) { 442 ajj = aj + ai[i]; 443 rnz = ai[i + 1] - ai[i]; 444 for (k = 0; k < rnz; k++) { 445 for (j = 0; j < bs; j++) { 446 for (m = 0; m < bs; m++) { 447 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 448 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 449 idx++; 450 } 451 } 452 } 453 } 454 mumps->irn = row; 455 mumps->jcn = col; 456 mumps->nnz = nz; 457 mumps->val = aa->a; 458 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 459 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 460 PetscFunctionReturn(PETSC_SUCCESS); 461 } 462 463 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 464 { 465 const PetscInt *ai, *aj, *ajj; 466 PetscInt bs; 467 PetscInt64 nz, rnz, i, j, k, m; 468 PetscMUMPSInt *row, *col; 469 PetscScalar *val; 470 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 471 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 472 #if defined(PETSC_USE_COMPLEX) 473 PetscBool isset, hermitian; 474 #endif 475 476 PetscFunctionBegin; 477 #if defined(PETSC_USE_COMPLEX) 478 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 479 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 480 #endif 481 ai = aa->i; 482 aj = aa->j; 483 PetscCall(MatGetBlockSize(A, &bs)); 484 if (reuse == MAT_INITIAL_MATRIX) { 485 const PetscInt64 alloc_size = aa->nz * bs2; 486 487 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 488 if (bs > 1) { 489 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 490 mumps->val = mumps->val_alloc; 491 } else { 492 mumps->val = aa->a; 493 } 494 mumps->irn = row; 495 mumps->jcn = col; 496 } else { 497 row = mumps->irn; 498 col = mumps->jcn; 499 } 500 val = mumps->val; 501 502 nz = 0; 503 if (bs > 1) { 504 for (i = 0; i < mbs; i++) { 505 rnz = ai[i + 1] - ai[i]; 506 ajj = aj + ai[i]; 507 for (j = 0; j < rnz; j++) { 508 for (k = 0; k < bs; k++) { 509 for (m = 0; m < bs; m++) { 510 if (ajj[j] > i || k >= m) { 511 if (reuse == MAT_INITIAL_MATRIX) { 512 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 513 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 514 } 515 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 516 } 517 } 518 } 519 } 520 } 521 } else if (reuse == MAT_INITIAL_MATRIX) { 522 for (i = 0; i < mbs; i++) { 523 rnz = ai[i + 1] - ai[i]; 524 ajj = aj + ai[i]; 525 for (j = 0; j < rnz; j++) { 526 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 527 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 528 nz++; 529 } 530 } 531 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 532 } else if (mumps->nest_vals) 533 PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 534 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 535 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 536 PetscFunctionReturn(PETSC_SUCCESS); 537 } 538 539 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 540 { 541 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 542 PetscInt64 nz, rnz, i, j; 543 const PetscScalar *av, *v1; 544 PetscScalar *val; 545 PetscMUMPSInt *row, *col; 546 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 547 PetscBool missing; 548 #if defined(PETSC_USE_COMPLEX) 549 PetscBool hermitian, isset; 550 #endif 551 552 PetscFunctionBegin; 553 #if defined(PETSC_USE_COMPLEX) 554 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 555 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 556 #endif 557 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 558 ai = aa->i; 559 aj = aa->j; 560 adiag = aa->diag; 561 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 562 if (reuse == MAT_INITIAL_MATRIX) { 563 /* count nz in the upper triangular part of A */ 564 nz = 0; 565 if (missing) { 566 for (i = 0; i < M; i++) { 567 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 568 for (j = ai[i]; j < ai[i + 1]; j++) { 569 if (aj[j] < i) continue; 570 nz++; 571 } 572 } else { 573 nz += ai[i + 1] - adiag[i]; 574 } 575 } 576 } else { 577 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 578 } 579 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 580 PetscCall(PetscMalloc1(nz, &val)); 581 mumps->nnz = nz; 582 mumps->irn = row; 583 mumps->jcn = col; 584 mumps->val = mumps->val_alloc = val; 585 586 nz = 0; 587 if (missing) { 588 for (i = 0; i < M; i++) { 589 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 590 for (j = ai[i]; j < ai[i + 1]; j++) { 591 if (aj[j] < i) continue; 592 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 593 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 594 val[nz] = av[j]; 595 nz++; 596 } 597 } else { 598 rnz = ai[i + 1] - adiag[i]; 599 ajj = aj + adiag[i]; 600 v1 = av + adiag[i]; 601 for (j = 0; j < rnz; j++) { 602 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 603 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 604 val[nz++] = v1[j]; 605 } 606 } 607 } 608 } else { 609 for (i = 0; i < M; i++) { 610 rnz = ai[i + 1] - adiag[i]; 611 ajj = aj + adiag[i]; 612 v1 = av + adiag[i]; 613 for (j = 0; j < rnz; j++) { 614 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 615 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 616 val[nz++] = v1[j]; 617 } 618 } 619 } 620 } else { 621 nz = 0; 622 val = mumps->val; 623 if (missing) { 624 for (i = 0; i < M; i++) { 625 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 626 for (j = ai[i]; j < ai[i + 1]; j++) { 627 if (aj[j] < i) continue; 628 val[nz++] = av[j]; 629 } 630 } else { 631 rnz = ai[i + 1] - adiag[i]; 632 v1 = av + adiag[i]; 633 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 634 } 635 } 636 } else { 637 for (i = 0; i < M; i++) { 638 rnz = ai[i + 1] - adiag[i]; 639 v1 = av + adiag[i]; 640 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 641 } 642 } 643 } 644 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 645 PetscFunctionReturn(PETSC_SUCCESS); 646 } 647 648 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 649 { 650 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 651 PetscInt bs; 652 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 653 PetscMUMPSInt *row, *col; 654 const PetscScalar *av, *bv, *v1, *v2; 655 PetscScalar *val; 656 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 657 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 658 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 659 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 660 #if defined(PETSC_USE_COMPLEX) 661 PetscBool hermitian, isset; 662 #endif 663 664 PetscFunctionBegin; 665 #if defined(PETSC_USE_COMPLEX) 666 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 667 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 668 #endif 669 PetscCall(MatGetBlockSize(A, &bs)); 670 rstart = A->rmap->rstart; 671 ai = aa->i; 672 aj = aa->j; 673 bi = bb->i; 674 bj = bb->j; 675 av = aa->a; 676 bv = bb->a; 677 678 garray = mat->garray; 679 680 if (reuse == MAT_INITIAL_MATRIX) { 681 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 682 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 683 PetscCall(PetscMalloc1(nz, &val)); 684 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 685 mumps->irn = row; 686 mumps->jcn = col; 687 mumps->val = mumps->val_alloc = val; 688 } else { 689 val = mumps->val; 690 } 691 692 jj = 0; 693 irow = rstart; 694 for (i = 0; i < mbs; i++) { 695 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 696 countA = ai[i + 1] - ai[i]; 697 countB = bi[i + 1] - bi[i]; 698 bjj = bj + bi[i]; 699 v1 = av + ai[i] * bs2; 700 v2 = bv + bi[i] * bs2; 701 702 if (bs > 1) { 703 /* A-part */ 704 for (j = 0; j < countA; j++) { 705 for (k = 0; k < bs; k++) { 706 for (m = 0; m < bs; m++) { 707 if (rstart + ajj[j] * bs > irow || k >= m) { 708 if (reuse == MAT_INITIAL_MATRIX) { 709 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 710 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 711 } 712 val[jj++] = v1[j * bs2 + m + k * bs]; 713 } 714 } 715 } 716 } 717 718 /* B-part */ 719 for (j = 0; j < countB; j++) { 720 for (k = 0; k < bs; k++) { 721 for (m = 0; m < bs; m++) { 722 if (reuse == MAT_INITIAL_MATRIX) { 723 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 724 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 725 } 726 val[jj++] = v2[j * bs2 + m + k * bs]; 727 } 728 } 729 } 730 } else { 731 /* A-part */ 732 for (j = 0; j < countA; j++) { 733 if (reuse == MAT_INITIAL_MATRIX) { 734 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 735 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 736 } 737 val[jj++] = v1[j]; 738 } 739 740 /* B-part */ 741 for (j = 0; j < countB; j++) { 742 if (reuse == MAT_INITIAL_MATRIX) { 743 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 744 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 745 } 746 val[jj++] = v2[j]; 747 } 748 } 749 irow += bs; 750 } 751 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 752 PetscFunctionReturn(PETSC_SUCCESS); 753 } 754 755 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 756 { 757 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 758 PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 759 PetscMUMPSInt *row, *col; 760 const PetscScalar *av, *bv, *v1, *v2; 761 PetscScalar *val; 762 Mat Ad, Ao; 763 Mat_SeqAIJ *aa; 764 Mat_SeqAIJ *bb; 765 766 PetscFunctionBegin; 767 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 768 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 769 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 770 771 aa = (Mat_SeqAIJ *)(Ad)->data; 772 bb = (Mat_SeqAIJ *)(Ao)->data; 773 ai = aa->i; 774 aj = aa->j; 775 bi = bb->i; 776 bj = bb->j; 777 778 rstart = A->rmap->rstart; 779 cstart = A->cmap->rstart; 780 781 if (reuse == MAT_INITIAL_MATRIX) { 782 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 783 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 784 PetscCall(PetscMalloc1(nz, &val)); 785 mumps->nnz = nz; 786 mumps->irn = row; 787 mumps->jcn = col; 788 mumps->val = mumps->val_alloc = val; 789 } else { 790 val = mumps->val; 791 } 792 793 jj = 0; 794 irow = rstart; 795 for (i = 0; i < m; i++) { 796 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 797 countA = ai[i + 1] - ai[i]; 798 countB = bi[i + 1] - bi[i]; 799 bjj = bj + bi[i]; 800 v1 = av + ai[i]; 801 v2 = bv + bi[i]; 802 803 /* A-part */ 804 for (j = 0; j < countA; j++) { 805 if (reuse == MAT_INITIAL_MATRIX) { 806 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 807 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 808 } 809 val[jj++] = v1[j]; 810 } 811 812 /* B-part */ 813 for (j = 0; j < countB; j++) { 814 if (reuse == MAT_INITIAL_MATRIX) { 815 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 816 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 817 } 818 val[jj++] = v2[j]; 819 } 820 irow++; 821 } 822 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 823 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 824 PetscFunctionReturn(PETSC_SUCCESS); 825 } 826 827 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 828 { 829 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 830 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 831 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 832 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 833 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 834 const PetscInt bs2 = mat->bs2; 835 PetscInt bs; 836 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 837 PetscMUMPSInt *row, *col; 838 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 839 PetscScalar *val; 840 841 PetscFunctionBegin; 842 PetscCall(MatGetBlockSize(A, &bs)); 843 if (reuse == MAT_INITIAL_MATRIX) { 844 nz = bs2 * (aa->nz + bb->nz); 845 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 846 PetscCall(PetscMalloc1(nz, &val)); 847 mumps->nnz = nz; 848 mumps->irn = row; 849 mumps->jcn = col; 850 mumps->val = mumps->val_alloc = val; 851 } else { 852 val = mumps->val; 853 } 854 855 jj = 0; 856 irow = rstart; 857 for (i = 0; i < mbs; i++) { 858 countA = ai[i + 1] - ai[i]; 859 countB = bi[i + 1] - bi[i]; 860 ajj = aj + ai[i]; 861 bjj = bj + bi[i]; 862 v1 = av + bs2 * ai[i]; 863 v2 = bv + bs2 * bi[i]; 864 865 idx = 0; 866 /* A-part */ 867 for (k = 0; k < countA; k++) { 868 for (j = 0; j < bs; j++) { 869 for (n = 0; n < bs; n++) { 870 if (reuse == MAT_INITIAL_MATRIX) { 871 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 872 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 873 } 874 val[jj++] = v1[idx++]; 875 } 876 } 877 } 878 879 idx = 0; 880 /* B-part */ 881 for (k = 0; k < countB; k++) { 882 for (j = 0; j < bs; j++) { 883 for (n = 0; n < bs; n++) { 884 if (reuse == MAT_INITIAL_MATRIX) { 885 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 886 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 887 } 888 val[jj++] = v2[idx++]; 889 } 890 } 891 } 892 irow += bs; 893 } 894 PetscFunctionReturn(PETSC_SUCCESS); 895 } 896 897 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 898 { 899 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 900 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 901 PetscMUMPSInt *row, *col; 902 const PetscScalar *av, *bv, *v1, *v2; 903 PetscScalar *val; 904 Mat Ad, Ao; 905 Mat_SeqAIJ *aa; 906 Mat_SeqAIJ *bb; 907 #if defined(PETSC_USE_COMPLEX) 908 PetscBool hermitian, isset; 909 #endif 910 911 PetscFunctionBegin; 912 #if defined(PETSC_USE_COMPLEX) 913 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 914 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 915 #endif 916 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 917 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 918 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 919 920 aa = (Mat_SeqAIJ *)(Ad)->data; 921 bb = (Mat_SeqAIJ *)(Ao)->data; 922 ai = aa->i; 923 aj = aa->j; 924 adiag = aa->diag; 925 bi = bb->i; 926 bj = bb->j; 927 928 rstart = A->rmap->rstart; 929 930 if (reuse == MAT_INITIAL_MATRIX) { 931 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 932 nzb = 0; /* num of upper triangular entries in mat->B */ 933 for (i = 0; i < m; i++) { 934 nza += (ai[i + 1] - adiag[i]); 935 countB = bi[i + 1] - bi[i]; 936 bjj = bj + bi[i]; 937 for (j = 0; j < countB; j++) { 938 if (garray[bjj[j]] > rstart) nzb++; 939 } 940 } 941 942 nz = nza + nzb; /* total nz of upper triangular part of mat */ 943 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 944 PetscCall(PetscMalloc1(nz, &val)); 945 mumps->nnz = nz; 946 mumps->irn = row; 947 mumps->jcn = col; 948 mumps->val = mumps->val_alloc = val; 949 } else { 950 val = mumps->val; 951 } 952 953 jj = 0; 954 irow = rstart; 955 for (i = 0; i < m; i++) { 956 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 957 v1 = av + adiag[i]; 958 countA = ai[i + 1] - adiag[i]; 959 countB = bi[i + 1] - bi[i]; 960 bjj = bj + bi[i]; 961 v2 = bv + bi[i]; 962 963 /* A-part */ 964 for (j = 0; j < countA; j++) { 965 if (reuse == MAT_INITIAL_MATRIX) { 966 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 967 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 968 } 969 val[jj++] = v1[j]; 970 } 971 972 /* B-part */ 973 for (j = 0; j < countB; j++) { 974 if (garray[bjj[j]] > rstart) { 975 if (reuse == MAT_INITIAL_MATRIX) { 976 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 977 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 978 } 979 val[jj++] = v2[j]; 980 } 981 } 982 irow++; 983 } 984 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 985 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 986 PetscFunctionReturn(PETSC_SUCCESS); 987 } 988 989 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 990 { 991 const PetscScalar *av; 992 const PetscInt M = A->rmap->n; 993 PetscInt64 i; 994 PetscMUMPSInt *row, *col; 995 Vec v; 996 997 PetscFunctionBegin; 998 PetscCall(MatDiagonalGetDiagonal(A, &v)); 999 PetscCall(VecGetArrayRead(v, &av)); 1000 if (reuse == MAT_INITIAL_MATRIX) { 1001 PetscCall(PetscMalloc2(M, &row, M, &col)); 1002 for (i = 0; i < M; i++) { 1003 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1004 col[i] = row[i]; 1005 } 1006 mumps->val = (PetscScalar *)av; 1007 mumps->irn = row; 1008 mumps->jcn = col; 1009 mumps->nnz = M; 1010 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1011 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1012 PetscCall(VecRestoreArrayRead(v, &av)); 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1017 { 1018 PetscScalar *v; 1019 const PetscInt m = A->rmap->n, N = A->cmap->N; 1020 PetscInt lda; 1021 PetscInt64 i, j; 1022 PetscMUMPSInt *row, *col; 1023 1024 PetscFunctionBegin; 1025 PetscCall(MatDenseGetArray(A, &v)); 1026 PetscCall(MatDenseGetLDA(A, &lda)); 1027 if (reuse == MAT_INITIAL_MATRIX) { 1028 PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 1029 for (i = 0; i < m; i++) { 1030 col[i] = 0; 1031 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1032 } 1033 for (j = 1; j < N; j++) { 1034 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 1035 PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 1036 } 1037 if (lda == m) mumps->val = v; 1038 else { 1039 PetscCall(PetscMalloc1(m * N, &mumps->val)); 1040 mumps->val_alloc = mumps->val; 1041 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1042 } 1043 mumps->irn = row; 1044 mumps->jcn = col; 1045 mumps->nnz = m * N; 1046 } else { 1047 if (lda == m && !mumps->nest_vals) mumps->val = v; 1048 else { 1049 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1050 } 1051 } 1052 PetscCall(MatDenseRestoreArray(A, &v)); 1053 PetscFunctionReturn(PETSC_SUCCESS); 1054 } 1055 1056 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1057 { 1058 Mat **mats; 1059 PetscInt nr, nc; 1060 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1064 if (reuse == MAT_INITIAL_MATRIX) { 1065 PetscMUMPSInt *irns, *jcns; 1066 PetscScalar *vals; 1067 PetscInt64 totnnz, cumnnz, maxnnz; 1068 PetscInt *pjcns_w; 1069 IS *rows, *cols; 1070 PetscInt **rows_idx, **cols_idx; 1071 1072 cumnnz = 0; 1073 maxnnz = 0; 1074 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1075 for (PetscInt r = 0; r < nr; r++) { 1076 for (PetscInt c = 0; c < nc; c++) { 1077 Mat sub = mats[r][c]; 1078 1079 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1080 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1081 if (sub) { 1082 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1083 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE, isDiag, isDense; 1084 MatInfo info; 1085 1086 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1087 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1088 else { 1089 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1090 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1091 } 1092 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1093 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1094 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1095 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1096 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1097 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1098 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1099 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 1100 1101 if (chol) { 1102 if (r == c) { 1103 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1104 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1105 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1106 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1107 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1108 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1109 } else { 1110 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1111 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1112 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1113 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1114 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1115 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1116 } 1117 } else { 1118 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1119 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1120 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1121 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1122 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1123 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1124 } 1125 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1126 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1127 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1128 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1129 maxnnz = PetscMax(maxnnz, info.nz_used); 1130 } 1131 } 1132 } 1133 1134 /* Allocate total COO */ 1135 totnnz = cumnnz; 1136 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1137 PetscCall(PetscMalloc1(totnnz, &vals)); 1138 1139 /* Handle rows and column maps 1140 We directly map rows and use an SF for the columns */ 1141 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1142 PetscCall(MatNestGetISs(A, rows, cols)); 1143 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1144 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1145 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1146 else (void)maxnnz; 1147 1148 cumnnz = 0; 1149 for (PetscInt r = 0; r < nr; r++) { 1150 for (PetscInt c = 0; c < nc; c++) { 1151 Mat sub = mats[r][c]; 1152 const PetscInt *ridx = rows_idx[r]; 1153 const PetscInt *cidx = cols_idx[c]; 1154 PetscInt rst; 1155 PetscSF csf; 1156 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1157 PetscLayout cmap; 1158 1159 mumps->nest_vals_start[r * nc + c] = cumnnz; 1160 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1161 1162 /* Extract inner blocks if needed */ 1163 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1164 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1165 else { 1166 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1167 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1168 } 1169 swap = (PetscBool)(isTrans || isHTrans); 1170 1171 /* Get column layout to map off-process columns */ 1172 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1173 1174 /* Get row start to map on-process rows */ 1175 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1176 1177 /* Directly use the mumps datastructure and use C ordering for now */ 1178 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1179 1180 /* Swap the role of rows and columns indices for transposed blocks 1181 since we need values with global final ordering */ 1182 if (swap) { 1183 cidx = rows_idx[r]; 1184 ridx = cols_idx[c]; 1185 } 1186 1187 /* Communicate column indices 1188 This could have been done with a single SF but it would have complicated the code a lot. 1189 But since we do it only once, we pay the price of setting up an SF for each block */ 1190 if (PetscDefined(USE_64BIT_INDICES)) { 1191 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1192 } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 1193 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1194 PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1195 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1196 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1197 PetscCall(PetscSFDestroy(&csf)); 1198 1199 /* Import indices: use direct map for rows and mapped indices for columns */ 1200 if (swap) { 1201 for (PetscInt k = 0; k < mumps->nnz; k++) { 1202 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1203 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1204 } 1205 } else { 1206 for (PetscInt k = 0; k < mumps->nnz; k++) { 1207 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1208 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1209 } 1210 } 1211 1212 /* Import values to full COO */ 1213 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1214 if (isHTrans) { /* conjugate the entries */ 1215 PetscScalar *v = vals + cumnnz; 1216 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]); 1217 } 1218 1219 /* Shift new starting point and sanity check */ 1220 cumnnz += mumps->nnz; 1221 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1222 1223 /* Free scratch memory */ 1224 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1225 PetscCall(PetscFree(mumps->val_alloc)); 1226 mumps->val = NULL; 1227 mumps->nnz = 0; 1228 } 1229 } 1230 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1231 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1232 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1233 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1234 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1235 mumps->nest_vals_start[nr * nc] = cumnnz; 1236 1237 /* Set pointers for final MUMPS data structure */ 1238 mumps->nest_vals = vals; 1239 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1240 mumps->val = vals; 1241 mumps->irn = irns; 1242 mumps->jcn = jcns; 1243 mumps->nnz = cumnnz; 1244 } else { 1245 PetscScalar *oval = mumps->nest_vals; 1246 for (PetscInt r = 0; r < nr; r++) { 1247 for (PetscInt c = 0; c < nc; c++) { 1248 PetscBool isTrans, isHTrans = PETSC_FALSE; 1249 Mat sub = mats[r][c]; 1250 PetscInt midx = r * nc + c; 1251 1252 if (!mumps->nest_convert_to_triples[midx]) continue; 1253 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1254 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1255 else { 1256 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1257 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1258 } 1259 mumps->val = oval + mumps->nest_vals_start[midx]; 1260 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1261 if (isHTrans) { 1262 PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1263 for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1264 } 1265 } 1266 } 1267 mumps->val = oval; 1268 } 1269 PetscFunctionReturn(PETSC_SUCCESS); 1270 } 1271 1272 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1273 { 1274 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1275 1276 PetscFunctionBegin; 1277 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1278 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1279 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1280 PetscCall(VecDestroy(&mumps->b_seq)); 1281 PetscCall(VecDestroy(&mumps->x_seq)); 1282 PetscCall(PetscFree(mumps->id.perm_in)); 1283 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1284 PetscCall(PetscFree(mumps->val_alloc)); 1285 PetscCall(PetscFree(mumps->info)); 1286 PetscCall(PetscFree(mumps->ICNTL_pre)); 1287 PetscCall(PetscFree(mumps->CNTL_pre)); 1288 PetscCall(MatMumpsResetSchur_Private(mumps)); 1289 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1290 mumps->id.job = JOB_END; 1291 PetscMUMPS_c(mumps); 1292 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1293 if (mumps->mumps_comm != MPI_COMM_NULL) { 1294 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1295 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1296 } 1297 } 1298 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1299 if (mumps->use_petsc_omp_support) { 1300 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1301 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1302 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1303 } 1304 #endif 1305 PetscCall(PetscFree(mumps->ia_alloc)); 1306 PetscCall(PetscFree(mumps->ja_alloc)); 1307 PetscCall(PetscFree(mumps->recvcount)); 1308 PetscCall(PetscFree(mumps->reqs)); 1309 PetscCall(PetscFree(mumps->irhs_loc)); 1310 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1311 PetscCall(PetscFree(mumps->nest_vals)); 1312 PetscCall(PetscFree(A->data)); 1313 1314 /* clear composed functions */ 1315 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1316 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1317 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1318 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1319 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1320 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1321 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1322 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1323 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1324 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1325 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1326 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1327 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1328 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1329 PetscFunctionReturn(PETSC_SUCCESS); 1330 } 1331 1332 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1333 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1334 { 1335 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1336 const PetscMPIInt ompsize = mumps->omp_comm_size; 1337 PetscInt i, m, M, rstart; 1338 1339 PetscFunctionBegin; 1340 PetscCall(MatGetSize(A, &M, NULL)); 1341 PetscCall(MatGetLocalSize(A, &m, NULL)); 1342 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1343 if (ompsize == 1) { 1344 if (!mumps->irhs_loc) { 1345 mumps->nloc_rhs = m; 1346 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1347 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1348 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1349 } 1350 mumps->id.rhs_loc = (MumpsScalar *)array; 1351 } else { 1352 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1353 const PetscInt *ranges; 1354 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1355 MPI_Group petsc_group, omp_group; 1356 PetscScalar *recvbuf = NULL; 1357 1358 if (mumps->is_omp_master) { 1359 /* Lazily initialize the omp stuff for distributed rhs */ 1360 if (!mumps->irhs_loc) { 1361 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1362 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1363 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1364 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1365 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1366 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1367 1368 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1369 mumps->nloc_rhs = 0; 1370 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1371 for (j = 0; j < ompsize; j++) { 1372 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1373 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1374 } 1375 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1376 for (j = k = 0; j < ompsize; j++) { 1377 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1378 } 1379 1380 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1381 PetscCallMPI(MPI_Group_free(&petsc_group)); 1382 PetscCallMPI(MPI_Group_free(&omp_group)); 1383 } 1384 1385 /* Realloc buffers when current nrhs is bigger than what we have met */ 1386 if (nrhs > mumps->max_nrhs) { 1387 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1388 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1389 mumps->max_nrhs = nrhs; 1390 } 1391 1392 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1393 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1394 mumps->rhs_disps[0] = 0; 1395 for (j = 1; j < ompsize; j++) { 1396 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1397 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1398 } 1399 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1400 } 1401 1402 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1403 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1404 1405 if (mumps->is_omp_master) { 1406 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1407 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1408 for (j = 0; j < ompsize; j++) { 1409 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1410 dst = dstbase; 1411 for (i = 0; i < nrhs; i++) { 1412 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1413 src += mumps->rhs_nrow[j]; 1414 dst += mumps->nloc_rhs; 1415 } 1416 dstbase += mumps->rhs_nrow[j]; 1417 } 1418 } 1419 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1420 } 1421 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1422 } 1423 mumps->id.nrhs = nrhs; 1424 mumps->id.nloc_rhs = mumps->nloc_rhs; 1425 mumps->id.lrhs_loc = mumps->nloc_rhs; 1426 mumps->id.irhs_loc = mumps->irhs_loc; 1427 PetscFunctionReturn(PETSC_SUCCESS); 1428 } 1429 1430 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1431 { 1432 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1433 const PetscScalar *rarray = NULL; 1434 PetscScalar *array; 1435 IS is_iden, is_petsc; 1436 PetscInt i; 1437 PetscBool second_solve = PETSC_FALSE; 1438 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1439 1440 PetscFunctionBegin; 1441 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1442 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1443 &cite1)); 1444 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1445 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1446 &cite2)); 1447 1448 if (A->factorerrortype) { 1449 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1450 PetscCall(VecSetInf(x)); 1451 PetscFunctionReturn(PETSC_SUCCESS); 1452 } 1453 1454 mumps->id.nrhs = 1; 1455 if (mumps->petsc_size > 1) { 1456 if (mumps->ICNTL20 == 10) { 1457 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1458 PetscCall(VecGetArrayRead(b, &rarray)); 1459 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1460 } else { 1461 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1462 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1463 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1464 if (!mumps->myid) { 1465 PetscCall(VecGetArray(mumps->b_seq, &array)); 1466 mumps->id.rhs = (MumpsScalar *)array; 1467 } 1468 } 1469 } else { /* petsc_size == 1 */ 1470 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1471 PetscCall(VecCopy(b, x)); 1472 PetscCall(VecGetArray(x, &array)); 1473 mumps->id.rhs = (MumpsScalar *)array; 1474 } 1475 1476 /* 1477 handle condensation step of Schur complement (if any) 1478 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1479 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1480 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1481 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1482 */ 1483 if (mumps->id.size_schur > 0) { 1484 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1485 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1486 second_solve = PETSC_TRUE; 1487 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1488 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1489 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1490 } 1491 /* solve phase */ 1492 mumps->id.job = JOB_SOLVE; 1493 PetscMUMPS_c(mumps); 1494 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1495 1496 /* handle expansion step of Schur complement (if any) */ 1497 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1498 else if (mumps->id.ICNTL(26) == 1) { 1499 PetscCall(MatMumpsSolveSchur_Private(A)); 1500 for (i = 0; i < mumps->id.size_schur; ++i) { 1501 #if !defined(PETSC_USE_COMPLEX) 1502 PetscScalar val = mumps->id.redrhs[i]; 1503 #else 1504 PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i; 1505 #endif 1506 array[mumps->id.listvar_schur[i] - 1] = val; 1507 } 1508 } 1509 1510 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1511 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1512 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1513 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1514 } 1515 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1516 PetscInt *isol2_loc = NULL; 1517 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1518 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1519 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1520 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1521 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1522 PetscCall(ISDestroy(&is_iden)); 1523 PetscCall(ISDestroy(&is_petsc)); 1524 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1525 } 1526 1527 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1528 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1529 } 1530 1531 if (mumps->petsc_size > 1) { 1532 if (mumps->ICNTL20 == 10) { 1533 PetscCall(VecRestoreArrayRead(b, &rarray)); 1534 } else if (!mumps->myid) { 1535 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1536 } 1537 } else PetscCall(VecRestoreArray(x, &array)); 1538 1539 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1540 PetscFunctionReturn(PETSC_SUCCESS); 1541 } 1542 1543 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1544 { 1545 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1546 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1547 1548 PetscFunctionBegin; 1549 mumps->id.ICNTL(9) = 0; 1550 PetscCall(MatSolve_MUMPS(A, b, x)); 1551 mumps->id.ICNTL(9) = value; 1552 PetscFunctionReturn(PETSC_SUCCESS); 1553 } 1554 1555 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1556 { 1557 Mat Bt = NULL; 1558 PetscBool denseX, denseB, flg, flgT; 1559 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1560 PetscInt i, nrhs, M; 1561 PetscScalar *array; 1562 const PetscScalar *rbray; 1563 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1564 PetscMUMPSInt *isol_loc, *isol_loc_save; 1565 PetscScalar *bray, *sol_loc, *sol_loc_save; 1566 IS is_to, is_from; 1567 PetscInt k, proc, j, m, myrstart; 1568 const PetscInt *rstart; 1569 Vec v_mpi, msol_loc; 1570 VecScatter scat_sol; 1571 Vec b_seq; 1572 VecScatter scat_rhs; 1573 PetscScalar *aa; 1574 PetscInt spnr, *ia, *ja; 1575 Mat_MPIAIJ *b = NULL; 1576 1577 PetscFunctionBegin; 1578 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1579 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1580 1581 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1582 if (denseB) { 1583 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1584 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1585 } else { /* sparse B */ 1586 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1587 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1588 if (flgT) { /* input B is transpose of actual RHS matrix, 1589 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1590 PetscCall(MatTransposeGetMat(B, &Bt)); 1591 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1592 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1593 } 1594 1595 PetscCall(MatGetSize(B, &M, &nrhs)); 1596 mumps->id.nrhs = nrhs; 1597 mumps->id.lrhs = M; 1598 mumps->id.rhs = NULL; 1599 1600 if (mumps->petsc_size == 1) { 1601 PetscScalar *aa; 1602 PetscInt spnr, *ia, *ja; 1603 PetscBool second_solve = PETSC_FALSE; 1604 1605 PetscCall(MatDenseGetArray(X, &array)); 1606 mumps->id.rhs = (MumpsScalar *)array; 1607 1608 if (denseB) { 1609 /* copy B to X */ 1610 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1611 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1612 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1613 } else { /* sparse B */ 1614 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1615 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1616 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1617 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1618 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1619 } 1620 /* handle condensation step of Schur complement (if any) */ 1621 if (mumps->id.size_schur > 0) { 1622 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1623 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1624 second_solve = PETSC_TRUE; 1625 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1626 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1627 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1628 } 1629 /* solve phase */ 1630 mumps->id.job = JOB_SOLVE; 1631 PetscMUMPS_c(mumps); 1632 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1633 1634 /* handle expansion step of Schur complement (if any) */ 1635 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1636 else if (mumps->id.ICNTL(26) == 1) { 1637 PetscCall(MatMumpsSolveSchur_Private(A)); 1638 for (j = 0; j < nrhs; ++j) 1639 for (i = 0; i < mumps->id.size_schur; ++i) { 1640 #if !defined(PETSC_USE_COMPLEX) 1641 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs]; 1642 #else 1643 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i; 1644 #endif 1645 array[mumps->id.listvar_schur[i] - 1 + j * M] = val; 1646 } 1647 } 1648 if (!denseB) { /* sparse B */ 1649 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1650 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1651 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1652 } 1653 PetscCall(MatDenseRestoreArray(X, &array)); 1654 PetscFunctionReturn(PETSC_SUCCESS); 1655 } 1656 1657 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1658 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1659 1660 /* create msol_loc to hold mumps local solution */ 1661 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1662 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1663 1664 lsol_loc = mumps->id.lsol_loc; 1665 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1666 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1667 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1668 mumps->id.isol_loc = isol_loc; 1669 1670 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1671 1672 if (denseB) { 1673 if (mumps->ICNTL20 == 10) { 1674 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1675 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1676 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1677 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1678 PetscCall(MatGetLocalSize(B, &m, NULL)); 1679 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1680 } else { 1681 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1682 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1683 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1684 0, re-arrange B into desired order, which is a local operation. 1685 */ 1686 1687 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1688 /* wrap dense rhs matrix B into a vector v_mpi */ 1689 PetscCall(MatGetLocalSize(B, &m, NULL)); 1690 PetscCall(MatDenseGetArray(B, &bray)); 1691 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1692 PetscCall(MatDenseRestoreArray(B, &bray)); 1693 1694 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1695 if (!mumps->myid) { 1696 PetscInt *idx; 1697 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1698 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1699 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1700 k = 0; 1701 for (proc = 0; proc < mumps->petsc_size; proc++) { 1702 for (j = 0; j < nrhs; j++) { 1703 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1704 } 1705 } 1706 1707 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1708 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1709 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1710 } else { 1711 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1712 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1713 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1714 } 1715 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1716 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1717 PetscCall(ISDestroy(&is_to)); 1718 PetscCall(ISDestroy(&is_from)); 1719 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1720 1721 if (!mumps->myid) { /* define rhs on the host */ 1722 PetscCall(VecGetArray(b_seq, &bray)); 1723 mumps->id.rhs = (MumpsScalar *)bray; 1724 PetscCall(VecRestoreArray(b_seq, &bray)); 1725 } 1726 } 1727 } else { /* sparse B */ 1728 b = (Mat_MPIAIJ *)Bt->data; 1729 1730 /* wrap dense X into a vector v_mpi */ 1731 PetscCall(MatGetLocalSize(X, &m, NULL)); 1732 PetscCall(MatDenseGetArray(X, &bray)); 1733 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1734 PetscCall(MatDenseRestoreArray(X, &bray)); 1735 1736 if (!mumps->myid) { 1737 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1738 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1739 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1740 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1741 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1742 } else { 1743 mumps->id.irhs_ptr = NULL; 1744 mumps->id.irhs_sparse = NULL; 1745 mumps->id.nz_rhs = 0; 1746 mumps->id.rhs_sparse = NULL; 1747 } 1748 } 1749 1750 /* solve phase */ 1751 mumps->id.job = JOB_SOLVE; 1752 PetscMUMPS_c(mumps); 1753 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1754 1755 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1756 PetscCall(MatDenseGetArray(X, &array)); 1757 PetscCall(VecPlaceArray(v_mpi, array)); 1758 1759 /* create scatter scat_sol */ 1760 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1761 /* iidx: index for scatter mumps solution to petsc X */ 1762 1763 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1764 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1765 for (i = 0; i < lsol_loc; i++) { 1766 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1767 1768 for (proc = 0; proc < mumps->petsc_size; proc++) { 1769 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1770 myrstart = rstart[proc]; 1771 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1772 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1773 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1774 break; 1775 } 1776 } 1777 1778 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1779 } 1780 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1781 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1782 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1783 PetscCall(ISDestroy(&is_from)); 1784 PetscCall(ISDestroy(&is_to)); 1785 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1786 PetscCall(MatDenseRestoreArray(X, &array)); 1787 1788 /* free spaces */ 1789 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1790 mumps->id.isol_loc = isol_loc_save; 1791 1792 PetscCall(PetscFree2(sol_loc, isol_loc)); 1793 PetscCall(PetscFree(idxx)); 1794 PetscCall(VecDestroy(&msol_loc)); 1795 PetscCall(VecDestroy(&v_mpi)); 1796 if (!denseB) { 1797 if (!mumps->myid) { 1798 b = (Mat_MPIAIJ *)Bt->data; 1799 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1800 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1801 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1802 } 1803 } else { 1804 if (mumps->ICNTL20 == 0) { 1805 PetscCall(VecDestroy(&b_seq)); 1806 PetscCall(VecScatterDestroy(&scat_rhs)); 1807 } 1808 } 1809 PetscCall(VecScatterDestroy(&scat_sol)); 1810 PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)))); 1811 PetscFunctionReturn(PETSC_SUCCESS); 1812 } 1813 1814 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1815 { 1816 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1817 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1818 1819 PetscFunctionBegin; 1820 mumps->id.ICNTL(9) = 0; 1821 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1822 mumps->id.ICNTL(9) = value; 1823 PetscFunctionReturn(PETSC_SUCCESS); 1824 } 1825 1826 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1827 { 1828 PetscBool flg; 1829 Mat B; 1830 1831 PetscFunctionBegin; 1832 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1833 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1834 1835 /* Create B=Bt^T that uses Bt's data structure */ 1836 PetscCall(MatCreateTranspose(Bt, &B)); 1837 1838 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1839 PetscCall(MatDestroy(&B)); 1840 PetscFunctionReturn(PETSC_SUCCESS); 1841 } 1842 1843 #if !defined(PETSC_USE_COMPLEX) 1844 /* 1845 input: 1846 F: numeric factor 1847 output: 1848 nneg: total number of negative pivots 1849 nzero: total number of zero pivots 1850 npos: (global dimension of F) - nneg - nzero 1851 */ 1852 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1853 { 1854 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1855 PetscMPIInt size; 1856 1857 PetscFunctionBegin; 1858 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1859 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1860 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1861 1862 if (nneg) *nneg = mumps->id.INFOG(12); 1863 if (nzero || npos) { 1864 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1865 if (nzero) *nzero = mumps->id.INFOG(28); 1866 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1867 } 1868 PetscFunctionReturn(PETSC_SUCCESS); 1869 } 1870 #endif 1871 1872 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1873 { 1874 PetscInt i, nreqs; 1875 PetscMUMPSInt *irn, *jcn; 1876 PetscMPIInt count; 1877 PetscInt64 totnnz, remain; 1878 const PetscInt osize = mumps->omp_comm_size; 1879 PetscScalar *val; 1880 1881 PetscFunctionBegin; 1882 if (osize > 1) { 1883 if (reuse == MAT_INITIAL_MATRIX) { 1884 /* master first gathers counts of nonzeros to receive */ 1885 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1886 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1887 1888 /* Then each computes number of send/recvs */ 1889 if (mumps->is_omp_master) { 1890 /* Start from 1 since self communication is not done in MPI */ 1891 nreqs = 0; 1892 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1893 } else { 1894 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1895 } 1896 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1897 1898 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1899 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1900 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1901 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1902 */ 1903 nreqs = 0; /* counter for actual send/recvs */ 1904 if (mumps->is_omp_master) { 1905 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1906 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1907 PetscCall(PetscMalloc1(totnnz, &val)); 1908 1909 /* Self communication */ 1910 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1911 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1912 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1913 1914 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1915 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1916 PetscCall(PetscFree(mumps->val_alloc)); 1917 mumps->nnz = totnnz; 1918 mumps->irn = irn; 1919 mumps->jcn = jcn; 1920 mumps->val = mumps->val_alloc = val; 1921 1922 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1923 jcn += mumps->recvcount[0]; 1924 val += mumps->recvcount[0]; 1925 1926 /* Remote communication */ 1927 for (i = 1; i < osize; i++) { 1928 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1929 remain = mumps->recvcount[i] - count; 1930 while (count > 0) { 1931 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1932 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1933 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1934 irn += count; 1935 jcn += count; 1936 val += count; 1937 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1938 remain -= count; 1939 } 1940 } 1941 } else { 1942 irn = mumps->irn; 1943 jcn = mumps->jcn; 1944 val = mumps->val; 1945 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1946 remain = mumps->nnz - count; 1947 while (count > 0) { 1948 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1949 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1950 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1951 irn += count; 1952 jcn += count; 1953 val += count; 1954 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1955 remain -= count; 1956 } 1957 } 1958 } else { 1959 nreqs = 0; 1960 if (mumps->is_omp_master) { 1961 val = mumps->val + mumps->recvcount[0]; 1962 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1963 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1964 remain = mumps->recvcount[i] - count; 1965 while (count > 0) { 1966 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1967 val += count; 1968 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1969 remain -= count; 1970 } 1971 } 1972 } else { 1973 val = mumps->val; 1974 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1975 remain = mumps->nnz - count; 1976 while (count > 0) { 1977 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1978 val += count; 1979 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1980 remain -= count; 1981 } 1982 } 1983 } 1984 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1985 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1986 } 1987 PetscFunctionReturn(PETSC_SUCCESS); 1988 } 1989 1990 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1991 { 1992 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1993 PetscBool isMPIAIJ; 1994 1995 PetscFunctionBegin; 1996 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1997 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1998 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1999 PetscFunctionReturn(PETSC_SUCCESS); 2000 } 2001 2002 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 2003 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 2004 2005 /* numerical factorization phase */ 2006 mumps->id.job = JOB_FACTNUMERIC; 2007 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2008 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 2009 } else { 2010 mumps->id.a_loc = (MumpsScalar *)mumps->val; 2011 } 2012 PetscMUMPS_c(mumps); 2013 if (mumps->id.INFOG(1) < 0) { 2014 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2015 if (mumps->id.INFOG(1) == -10) { 2016 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2017 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2018 } else if (mumps->id.INFOG(1) == -13) { 2019 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2020 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2021 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2022 PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2023 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2024 } else { 2025 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2026 F->factorerrortype = MAT_FACTOR_OTHER; 2027 } 2028 } 2029 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2030 2031 F->assembled = PETSC_TRUE; 2032 2033 if (F->schur) { /* reset Schur status to unfactored */ 2034 #if defined(PETSC_HAVE_CUDA) 2035 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 2036 #endif 2037 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2038 mumps->id.ICNTL(19) = 2; 2039 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2040 } 2041 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2042 } 2043 2044 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2045 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2046 2047 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2048 if (mumps->petsc_size > 1) { 2049 PetscInt lsol_loc; 2050 PetscScalar *sol_loc; 2051 2052 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 2053 2054 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2055 if (mumps->x_seq) { 2056 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2057 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 2058 PetscCall(VecDestroy(&mumps->x_seq)); 2059 } 2060 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2061 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 2062 mumps->id.lsol_loc = lsol_loc; 2063 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 2064 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 2065 } 2066 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 2067 PetscFunctionReturn(PETSC_SUCCESS); 2068 } 2069 2070 /* Sets MUMPS options from the options database */ 2071 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2072 { 2073 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2074 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2075 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2076 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 2077 MumpsScalar *arr; 2078 2079 PetscFunctionBegin; 2080 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2081 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2082 PetscInt nthreads = 0; 2083 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2084 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2085 2086 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2087 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2088 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2089 2090 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2091 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2092 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2093 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2094 if (mumps->use_petsc_omp_support) { 2095 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2096 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2097 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2098 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2099 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2100 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2101 #endif 2102 } else { 2103 mumps->omp_comm = PETSC_COMM_SELF; 2104 mumps->mumps_comm = mumps->petsc_comm; 2105 mumps->is_omp_master = PETSC_TRUE; 2106 } 2107 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2108 mumps->reqs = NULL; 2109 mumps->tag = 0; 2110 2111 if (mumps->mumps_comm != MPI_COMM_NULL) { 2112 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2113 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2114 MPI_Comm comm; 2115 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2116 mumps->mumps_comm = comm; 2117 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2118 } 2119 2120 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2121 mumps->id.job = JOB_INIT; 2122 mumps->id.par = 1; /* host participates factorizaton and solve */ 2123 mumps->id.sym = mumps->sym; 2124 2125 size = mumps->id.size_schur; 2126 arr = mumps->id.schur; 2127 listvar_schur = mumps->id.listvar_schur; 2128 PetscMUMPS_c(mumps); 2129 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2130 /* restore cached ICNTL and CNTL values */ 2131 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2132 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2133 PetscCall(PetscFree(mumps->ICNTL_pre)); 2134 PetscCall(PetscFree(mumps->CNTL_pre)); 2135 2136 if (schur) { 2137 mumps->id.size_schur = size; 2138 mumps->id.schur_lld = size; 2139 mumps->id.schur = arr; 2140 mumps->id.listvar_schur = listvar_schur; 2141 if (mumps->petsc_size > 1) { 2142 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2143 2144 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2145 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2146 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2147 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2148 } else { 2149 if (F->factortype == MAT_FACTOR_LU) { 2150 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2151 } else { 2152 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2153 } 2154 } 2155 mumps->id.ICNTL(26) = -1; 2156 } 2157 2158 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2159 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2160 */ 2161 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2162 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2163 2164 mumps->scat_rhs = NULL; 2165 mumps->scat_sol = NULL; 2166 2167 /* set PETSc-MUMPS default options - override MUMPS default */ 2168 mumps->id.ICNTL(3) = 0; 2169 mumps->id.ICNTL(4) = 0; 2170 if (mumps->petsc_size == 1) { 2171 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2172 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2173 } else { 2174 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2175 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2176 } 2177 } 2178 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2179 if (flg) mumps->id.ICNTL(1) = icntl; 2180 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2181 if (flg) mumps->id.ICNTL(2) = icntl; 2182 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2183 if (flg) mumps->id.ICNTL(3) = icntl; 2184 2185 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2186 if (flg) mumps->id.ICNTL(4) = icntl; 2187 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2188 2189 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2190 if (flg) mumps->id.ICNTL(6) = icntl; 2191 2192 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2193 if (flg) { 2194 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2195 mumps->id.ICNTL(7) = icntl; 2196 } 2197 2198 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2199 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2200 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2201 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2202 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2203 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2204 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2205 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2206 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2207 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2208 if (flg) { 2209 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2210 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2211 } 2212 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2213 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2214 PetscCall(MatDestroy(&F->schur)); 2215 PetscCall(MatMumpsResetSchur_Private(mumps)); 2216 } 2217 2218 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2219 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2220 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2221 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2222 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2223 In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 2224 */ 2225 #if PETSC_PKG_MUMPS_VERSION_GE(5, 6, 2) && defined(PETSC_HAVE_MUMPS_AVOID_MPI_IN_PLACE) 2226 mumps->ICNTL20 = 10; 2227 #elif PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 2228 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2229 #else 2230 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2231 #endif 2232 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2233 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2234 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2235 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2236 #endif 2237 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2238 2239 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2240 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2241 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2242 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2243 2244 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2245 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2246 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2247 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2248 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2249 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2250 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2251 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2252 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2253 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2254 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2255 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2256 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2257 2258 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2259 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2260 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2261 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2262 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2263 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2264 2265 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2266 2267 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2268 if (ninfo) { 2269 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2270 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2271 mumps->ninfo = ninfo; 2272 for (i = 0; i < ninfo; i++) { 2273 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2274 mumps->info[i] = info[i]; 2275 } 2276 } 2277 PetscOptionsEnd(); 2278 PetscFunctionReturn(PETSC_SUCCESS); 2279 } 2280 2281 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2282 { 2283 PetscFunctionBegin; 2284 if (mumps->id.INFOG(1) < 0) { 2285 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2286 if (mumps->id.INFOG(1) == -6) { 2287 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2288 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2289 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2290 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2291 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2292 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2293 PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n")); 2294 } else { 2295 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2296 F->factorerrortype = MAT_FACTOR_OTHER; 2297 } 2298 } 2299 PetscFunctionReturn(PETSC_SUCCESS); 2300 } 2301 2302 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2303 { 2304 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2305 Vec b; 2306 const PetscInt M = A->rmap->N; 2307 2308 PetscFunctionBegin; 2309 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2310 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2311 PetscFunctionReturn(PETSC_SUCCESS); 2312 } 2313 2314 /* Set MUMPS options from the options database */ 2315 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2316 2317 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2318 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2319 2320 /* analysis phase */ 2321 mumps->id.job = JOB_FACTSYMBOLIC; 2322 mumps->id.n = M; 2323 switch (mumps->id.ICNTL(18)) { 2324 case 0: /* centralized assembled matrix input */ 2325 if (!mumps->myid) { 2326 mumps->id.nnz = mumps->nnz; 2327 mumps->id.irn = mumps->irn; 2328 mumps->id.jcn = mumps->jcn; 2329 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2330 if (r) { 2331 mumps->id.ICNTL(7) = 1; 2332 if (!mumps->myid) { 2333 const PetscInt *idx; 2334 PetscInt i; 2335 2336 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2337 PetscCall(ISGetIndices(r, &idx)); 2338 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2339 PetscCall(ISRestoreIndices(r, &idx)); 2340 } 2341 } 2342 } 2343 break; 2344 case 3: /* distributed assembled matrix input (size>1) */ 2345 mumps->id.nnz_loc = mumps->nnz; 2346 mumps->id.irn_loc = mumps->irn; 2347 mumps->id.jcn_loc = mumps->jcn; 2348 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2349 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2350 PetscCall(MatCreateVecs(A, NULL, &b)); 2351 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2352 PetscCall(VecDestroy(&b)); 2353 } 2354 break; 2355 } 2356 PetscMUMPS_c(mumps); 2357 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2358 2359 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2360 F->ops->solve = MatSolve_MUMPS; 2361 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2362 F->ops->matsolve = MatMatSolve_MUMPS; 2363 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2364 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2365 2366 mumps->matstruc = SAME_NONZERO_PATTERN; 2367 PetscFunctionReturn(PETSC_SUCCESS); 2368 } 2369 2370 /* Note the Petsc r and c permutations are ignored */ 2371 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2372 { 2373 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2374 Vec b; 2375 const PetscInt M = A->rmap->N; 2376 2377 PetscFunctionBegin; 2378 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2379 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2380 PetscFunctionReturn(PETSC_SUCCESS); 2381 } 2382 2383 /* Set MUMPS options from the options database */ 2384 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2385 2386 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2387 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2388 2389 /* analysis phase */ 2390 mumps->id.job = JOB_FACTSYMBOLIC; 2391 mumps->id.n = M; 2392 switch (mumps->id.ICNTL(18)) { 2393 case 0: /* centralized assembled matrix input */ 2394 if (!mumps->myid) { 2395 mumps->id.nnz = mumps->nnz; 2396 mumps->id.irn = mumps->irn; 2397 mumps->id.jcn = mumps->jcn; 2398 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2399 } 2400 break; 2401 case 3: /* distributed assembled matrix input (size>1) */ 2402 mumps->id.nnz_loc = mumps->nnz; 2403 mumps->id.irn_loc = mumps->irn; 2404 mumps->id.jcn_loc = mumps->jcn; 2405 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2406 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2407 PetscCall(MatCreateVecs(A, NULL, &b)); 2408 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2409 PetscCall(VecDestroy(&b)); 2410 } 2411 break; 2412 } 2413 PetscMUMPS_c(mumps); 2414 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2415 2416 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2417 F->ops->solve = MatSolve_MUMPS; 2418 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2419 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2420 2421 mumps->matstruc = SAME_NONZERO_PATTERN; 2422 PetscFunctionReturn(PETSC_SUCCESS); 2423 } 2424 2425 /* Note the Petsc r permutation and factor info are ignored */ 2426 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2427 { 2428 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2429 Vec b; 2430 const PetscInt M = A->rmap->N; 2431 2432 PetscFunctionBegin; 2433 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2434 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2435 PetscFunctionReturn(PETSC_SUCCESS); 2436 } 2437 2438 /* Set MUMPS options from the options database */ 2439 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2440 2441 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2442 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2443 2444 /* analysis phase */ 2445 mumps->id.job = JOB_FACTSYMBOLIC; 2446 mumps->id.n = M; 2447 switch (mumps->id.ICNTL(18)) { 2448 case 0: /* centralized assembled matrix input */ 2449 if (!mumps->myid) { 2450 mumps->id.nnz = mumps->nnz; 2451 mumps->id.irn = mumps->irn; 2452 mumps->id.jcn = mumps->jcn; 2453 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2454 } 2455 break; 2456 case 3: /* distributed assembled matrix input (size>1) */ 2457 mumps->id.nnz_loc = mumps->nnz; 2458 mumps->id.irn_loc = mumps->irn; 2459 mumps->id.jcn_loc = mumps->jcn; 2460 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2461 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2462 PetscCall(MatCreateVecs(A, NULL, &b)); 2463 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2464 PetscCall(VecDestroy(&b)); 2465 } 2466 break; 2467 } 2468 PetscMUMPS_c(mumps); 2469 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2470 2471 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2472 F->ops->solve = MatSolve_MUMPS; 2473 F->ops->solvetranspose = MatSolve_MUMPS; 2474 F->ops->matsolve = MatMatSolve_MUMPS; 2475 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2476 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2477 #if defined(PETSC_USE_COMPLEX) 2478 F->ops->getinertia = NULL; 2479 #else 2480 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2481 #endif 2482 2483 mumps->matstruc = SAME_NONZERO_PATTERN; 2484 PetscFunctionReturn(PETSC_SUCCESS); 2485 } 2486 2487 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2488 { 2489 PetscBool iascii; 2490 PetscViewerFormat format; 2491 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2492 2493 PetscFunctionBegin; 2494 /* check if matrix is mumps type */ 2495 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2496 2497 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2498 if (iascii) { 2499 PetscCall(PetscViewerGetFormat(viewer, &format)); 2500 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2501 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2502 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2503 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2504 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2505 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2506 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2507 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2508 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2509 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2510 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2511 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2512 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2513 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2514 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2515 if (mumps->id.ICNTL(11) > 0) { 2516 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2517 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2518 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2519 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2520 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2521 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2522 } 2523 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2524 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2525 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2526 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2527 /* ICNTL(15-17) not used */ 2528 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2529 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2530 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2531 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2532 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2533 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2534 2535 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2536 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2537 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2538 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2539 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2540 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2541 2542 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2543 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2544 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2545 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2546 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2547 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2548 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2549 2550 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2551 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2552 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2553 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2554 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2555 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2556 2557 /* information local to each processor */ 2558 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2559 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2560 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2561 PetscCall(PetscViewerFlush(viewer)); 2562 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2563 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2564 PetscCall(PetscViewerFlush(viewer)); 2565 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2566 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2567 PetscCall(PetscViewerFlush(viewer)); 2568 2569 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2570 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2571 PetscCall(PetscViewerFlush(viewer)); 2572 2573 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2574 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2575 PetscCall(PetscViewerFlush(viewer)); 2576 2577 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2578 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2579 PetscCall(PetscViewerFlush(viewer)); 2580 2581 if (mumps->ninfo && mumps->ninfo <= 80) { 2582 PetscInt i; 2583 for (i = 0; i < mumps->ninfo; i++) { 2584 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2585 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2586 PetscCall(PetscViewerFlush(viewer)); 2587 } 2588 } 2589 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2590 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2591 2592 if (mumps->myid == 0) { /* information from the host */ 2593 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2594 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2595 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2596 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2597 2598 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2599 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2600 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2601 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2602 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2603 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2604 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2605 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2606 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2607 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2608 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2609 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2610 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2611 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2612 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2613 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2614 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2615 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2616 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2617 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2618 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2619 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2620 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2621 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2622 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2623 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2624 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2625 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2626 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2627 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2628 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2629 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2630 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2631 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2632 } 2633 } 2634 } 2635 PetscFunctionReturn(PETSC_SUCCESS); 2636 } 2637 2638 static PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2639 { 2640 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2641 2642 PetscFunctionBegin; 2643 info->block_size = 1.0; 2644 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2645 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2646 info->nz_unneeded = 0.0; 2647 info->assemblies = 0.0; 2648 info->mallocs = 0.0; 2649 info->memory = 0.0; 2650 info->fill_ratio_given = 0; 2651 info->fill_ratio_needed = 0; 2652 info->factor_mallocs = 0; 2653 PetscFunctionReturn(PETSC_SUCCESS); 2654 } 2655 2656 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2657 { 2658 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2659 const PetscScalar *arr; 2660 const PetscInt *idxs; 2661 PetscInt size, i; 2662 2663 PetscFunctionBegin; 2664 PetscCall(ISGetLocalSize(is, &size)); 2665 /* Schur complement matrix */ 2666 PetscCall(MatDestroy(&F->schur)); 2667 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2668 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2669 mumps->id.schur = (MumpsScalar *)arr; 2670 mumps->id.size_schur = size; 2671 mumps->id.schur_lld = size; 2672 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2673 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2674 2675 /* MUMPS expects Fortran style indices */ 2676 PetscCall(PetscFree(mumps->id.listvar_schur)); 2677 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2678 PetscCall(ISGetIndices(is, &idxs)); 2679 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2680 PetscCall(ISRestoreIndices(is, &idxs)); 2681 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2682 mumps->id.ICNTL(26) = -1; 2683 PetscFunctionReturn(PETSC_SUCCESS); 2684 } 2685 2686 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2687 { 2688 Mat St; 2689 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2690 PetscScalar *array; 2691 2692 PetscFunctionBegin; 2693 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 2694 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2695 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2696 PetscCall(MatSetType(St, MATDENSE)); 2697 PetscCall(MatSetUp(St)); 2698 PetscCall(MatDenseGetArray(St, &array)); 2699 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2700 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2701 PetscInt i, j, N = mumps->id.size_schur; 2702 for (i = 0; i < N; i++) { 2703 for (j = 0; j < N; j++) { 2704 #if !defined(PETSC_USE_COMPLEX) 2705 PetscScalar val = mumps->id.schur[i * N + j]; 2706 #else 2707 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2708 #endif 2709 array[j * N + i] = val; 2710 } 2711 } 2712 } else { /* stored by columns */ 2713 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2714 } 2715 } else { /* either full or lower-triangular (not packed) */ 2716 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2717 PetscInt i, j, N = mumps->id.size_schur; 2718 for (i = 0; i < N; i++) { 2719 for (j = i; j < N; j++) { 2720 #if !defined(PETSC_USE_COMPLEX) 2721 PetscScalar val = mumps->id.schur[i * N + j]; 2722 #else 2723 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2724 #endif 2725 array[i * N + j] = array[j * N + i] = val; 2726 } 2727 } 2728 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2729 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2730 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2731 PetscInt i, j, N = mumps->id.size_schur; 2732 for (i = 0; i < N; i++) { 2733 for (j = 0; j < i + 1; j++) { 2734 #if !defined(PETSC_USE_COMPLEX) 2735 PetscScalar val = mumps->id.schur[i * N + j]; 2736 #else 2737 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2738 #endif 2739 array[i * N + j] = array[j * N + i] = val; 2740 } 2741 } 2742 } 2743 } 2744 PetscCall(MatDenseRestoreArray(St, &array)); 2745 *S = St; 2746 PetscFunctionReturn(PETSC_SUCCESS); 2747 } 2748 2749 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2750 { 2751 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2752 2753 PetscFunctionBegin; 2754 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2755 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2756 for (i = 0; i < nICNTL_pre; ++i) 2757 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2758 if (i == nICNTL_pre) { /* not already cached */ 2759 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2760 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2761 mumps->ICNTL_pre[0]++; 2762 } 2763 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2764 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2765 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2766 PetscFunctionReturn(PETSC_SUCCESS); 2767 } 2768 2769 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2770 { 2771 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2772 2773 PetscFunctionBegin; 2774 if (mumps->id.job == JOB_NULL) { 2775 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2776 *ival = 0; 2777 for (i = 0; i < nICNTL_pre; ++i) { 2778 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2779 } 2780 } else *ival = mumps->id.ICNTL(icntl); 2781 PetscFunctionReturn(PETSC_SUCCESS); 2782 } 2783 2784 /*@ 2785 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2786 2787 Logically Collective 2788 2789 Input Parameters: 2790 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2791 . icntl - index of MUMPS parameter array ICNTL() 2792 - ival - value of MUMPS ICNTL(icntl) 2793 2794 Options Database Key: 2795 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2796 2797 Level: beginner 2798 2799 References: 2800 . * - MUMPS Users' Guide 2801 2802 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2803 @*/ 2804 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2805 { 2806 PetscFunctionBegin; 2807 PetscValidType(F, 1); 2808 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2809 PetscValidLogicalCollectiveInt(F, icntl, 2); 2810 PetscValidLogicalCollectiveInt(F, ival, 3); 2811 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2812 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2813 PetscFunctionReturn(PETSC_SUCCESS); 2814 } 2815 2816 /*@ 2817 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2818 2819 Logically Collective 2820 2821 Input Parameters: 2822 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2823 - icntl - index of MUMPS parameter array ICNTL() 2824 2825 Output Parameter: 2826 . ival - value of MUMPS ICNTL(icntl) 2827 2828 Level: beginner 2829 2830 References: 2831 . * - MUMPS Users' Guide 2832 2833 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2834 @*/ 2835 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2836 { 2837 PetscFunctionBegin; 2838 PetscValidType(F, 1); 2839 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2840 PetscValidLogicalCollectiveInt(F, icntl, 2); 2841 PetscAssertPointer(ival, 3); 2842 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2843 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2844 PetscFunctionReturn(PETSC_SUCCESS); 2845 } 2846 2847 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2848 { 2849 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2850 2851 PetscFunctionBegin; 2852 if (mumps->id.job == JOB_NULL) { 2853 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2854 for (i = 0; i < nCNTL_pre; ++i) 2855 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2856 if (i == nCNTL_pre) { 2857 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2858 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2859 mumps->CNTL_pre[0]++; 2860 } 2861 mumps->CNTL_pre[1 + 2 * i] = icntl; 2862 mumps->CNTL_pre[2 + 2 * i] = val; 2863 } else mumps->id.CNTL(icntl) = val; 2864 PetscFunctionReturn(PETSC_SUCCESS); 2865 } 2866 2867 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2868 { 2869 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2870 2871 PetscFunctionBegin; 2872 if (mumps->id.job == JOB_NULL) { 2873 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2874 *val = 0.0; 2875 for (i = 0; i < nCNTL_pre; ++i) { 2876 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2877 } 2878 } else *val = mumps->id.CNTL(icntl); 2879 PetscFunctionReturn(PETSC_SUCCESS); 2880 } 2881 2882 /*@ 2883 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2884 2885 Logically Collective 2886 2887 Input Parameters: 2888 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2889 . icntl - index of MUMPS parameter array CNTL() 2890 - val - value of MUMPS CNTL(icntl) 2891 2892 Options Database Key: 2893 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2894 2895 Level: beginner 2896 2897 References: 2898 . * - MUMPS Users' Guide 2899 2900 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2901 @*/ 2902 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2903 { 2904 PetscFunctionBegin; 2905 PetscValidType(F, 1); 2906 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2907 PetscValidLogicalCollectiveInt(F, icntl, 2); 2908 PetscValidLogicalCollectiveReal(F, val, 3); 2909 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2910 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2911 PetscFunctionReturn(PETSC_SUCCESS); 2912 } 2913 2914 /*@ 2915 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2916 2917 Logically Collective 2918 2919 Input Parameters: 2920 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2921 - icntl - index of MUMPS parameter array CNTL() 2922 2923 Output Parameter: 2924 . val - value of MUMPS CNTL(icntl) 2925 2926 Level: beginner 2927 2928 References: 2929 . * - MUMPS Users' Guide 2930 2931 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2932 @*/ 2933 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2934 { 2935 PetscFunctionBegin; 2936 PetscValidType(F, 1); 2937 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2938 PetscValidLogicalCollectiveInt(F, icntl, 2); 2939 PetscAssertPointer(val, 3); 2940 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2941 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2942 PetscFunctionReturn(PETSC_SUCCESS); 2943 } 2944 2945 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2946 { 2947 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2948 2949 PetscFunctionBegin; 2950 *info = mumps->id.INFO(icntl); 2951 PetscFunctionReturn(PETSC_SUCCESS); 2952 } 2953 2954 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2955 { 2956 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2957 2958 PetscFunctionBegin; 2959 *infog = mumps->id.INFOG(icntl); 2960 PetscFunctionReturn(PETSC_SUCCESS); 2961 } 2962 2963 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2964 { 2965 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2966 2967 PetscFunctionBegin; 2968 *rinfo = mumps->id.RINFO(icntl); 2969 PetscFunctionReturn(PETSC_SUCCESS); 2970 } 2971 2972 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2973 { 2974 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2975 2976 PetscFunctionBegin; 2977 *rinfog = mumps->id.RINFOG(icntl); 2978 PetscFunctionReturn(PETSC_SUCCESS); 2979 } 2980 2981 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2982 { 2983 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2984 2985 PetscFunctionBegin; 2986 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2987 *size = 0; 2988 *array = NULL; 2989 if (!mumps->myid) { 2990 *size = mumps->id.INFOG(28); 2991 PetscCall(PetscMalloc1(*size, array)); 2992 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2993 } 2994 PetscFunctionReturn(PETSC_SUCCESS); 2995 } 2996 2997 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2998 { 2999 Mat Bt = NULL, Btseq = NULL; 3000 PetscBool flg; 3001 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3002 PetscScalar *aa; 3003 PetscInt spnr, *ia, *ja, M, nrhs; 3004 3005 PetscFunctionBegin; 3006 PetscAssertPointer(spRHS, 2); 3007 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 3008 if (flg) { 3009 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 3010 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 3011 3012 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3013 3014 if (mumps->petsc_size > 1) { 3015 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 3016 Btseq = b->A; 3017 } else { 3018 Btseq = Bt; 3019 } 3020 3021 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 3022 mumps->id.nrhs = nrhs; 3023 mumps->id.lrhs = M; 3024 mumps->id.rhs = NULL; 3025 3026 if (!mumps->myid) { 3027 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 3028 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3029 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3030 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3031 mumps->id.rhs_sparse = (MumpsScalar *)aa; 3032 } else { 3033 mumps->id.irhs_ptr = NULL; 3034 mumps->id.irhs_sparse = NULL; 3035 mumps->id.nz_rhs = 0; 3036 mumps->id.rhs_sparse = NULL; 3037 } 3038 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3039 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3040 3041 /* solve phase */ 3042 mumps->id.job = JOB_SOLVE; 3043 PetscMUMPS_c(mumps); 3044 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 3045 3046 if (!mumps->myid) { 3047 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3048 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3049 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3050 } 3051 PetscFunctionReturn(PETSC_SUCCESS); 3052 } 3053 3054 /*@ 3055 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 3056 3057 Logically Collective 3058 3059 Input Parameter: 3060 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3061 3062 Output Parameter: 3063 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3064 3065 Level: beginner 3066 3067 References: 3068 . * - MUMPS Users' Guide 3069 3070 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3071 @*/ 3072 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3073 { 3074 PetscFunctionBegin; 3075 PetscValidType(F, 1); 3076 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3077 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3078 PetscFunctionReturn(PETSC_SUCCESS); 3079 } 3080 3081 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3082 { 3083 Mat spRHS; 3084 3085 PetscFunctionBegin; 3086 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3087 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3088 PetscCall(MatDestroy(&spRHS)); 3089 PetscFunctionReturn(PETSC_SUCCESS); 3090 } 3091 3092 /*@ 3093 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 3094 3095 Logically Collective 3096 3097 Input Parameter: 3098 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3099 3100 Output Parameter: 3101 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3102 3103 Level: beginner 3104 3105 References: 3106 . * - MUMPS Users' Guide 3107 3108 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3109 @*/ 3110 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3111 { 3112 PetscBool flg; 3113 3114 PetscFunctionBegin; 3115 PetscValidType(F, 1); 3116 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3117 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3118 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3119 3120 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3121 PetscFunctionReturn(PETSC_SUCCESS); 3122 } 3123 3124 /*@ 3125 MatMumpsGetInfo - Get MUMPS parameter INFO() 3126 3127 Logically Collective 3128 3129 Input Parameters: 3130 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3131 - icntl - index of MUMPS parameter array INFO() 3132 3133 Output Parameter: 3134 . ival - value of MUMPS INFO(icntl) 3135 3136 Level: beginner 3137 3138 References: 3139 . * - MUMPS Users' Guide 3140 3141 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3142 @*/ 3143 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3144 { 3145 PetscFunctionBegin; 3146 PetscValidType(F, 1); 3147 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3148 PetscAssertPointer(ival, 3); 3149 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3150 PetscFunctionReturn(PETSC_SUCCESS); 3151 } 3152 3153 /*@ 3154 MatMumpsGetInfog - Get MUMPS parameter INFOG() 3155 3156 Logically Collective 3157 3158 Input Parameters: 3159 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3160 - icntl - index of MUMPS parameter array INFOG() 3161 3162 Output Parameter: 3163 . ival - value of MUMPS INFOG(icntl) 3164 3165 Level: beginner 3166 3167 References: 3168 . * - MUMPS Users' Guide 3169 3170 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3171 @*/ 3172 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3173 { 3174 PetscFunctionBegin; 3175 PetscValidType(F, 1); 3176 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3177 PetscAssertPointer(ival, 3); 3178 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3179 PetscFunctionReturn(PETSC_SUCCESS); 3180 } 3181 3182 /*@ 3183 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3184 3185 Logically Collective 3186 3187 Input Parameters: 3188 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3189 - icntl - index of MUMPS parameter array RINFO() 3190 3191 Output Parameter: 3192 . val - value of MUMPS RINFO(icntl) 3193 3194 Level: beginner 3195 3196 References: 3197 . * - MUMPS Users' Guide 3198 3199 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3200 @*/ 3201 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3202 { 3203 PetscFunctionBegin; 3204 PetscValidType(F, 1); 3205 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3206 PetscAssertPointer(val, 3); 3207 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3208 PetscFunctionReturn(PETSC_SUCCESS); 3209 } 3210 3211 /*@ 3212 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3213 3214 Logically Collective 3215 3216 Input Parameters: 3217 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3218 - icntl - index of MUMPS parameter array RINFOG() 3219 3220 Output Parameter: 3221 . val - value of MUMPS RINFOG(icntl) 3222 3223 Level: beginner 3224 3225 References: 3226 . * - MUMPS Users' Guide 3227 3228 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3229 @*/ 3230 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3231 { 3232 PetscFunctionBegin; 3233 PetscValidType(F, 1); 3234 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3235 PetscAssertPointer(val, 3); 3236 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3237 PetscFunctionReturn(PETSC_SUCCESS); 3238 } 3239 3240 /*@ 3241 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 3242 3243 Logically Collective 3244 3245 Input Parameter: 3246 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3247 3248 Output Parameters: 3249 + size - local size of the array. The size of the array is non-zero only on the host. 3250 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3251 for freeing this array. 3252 3253 Level: beginner 3254 3255 References: 3256 . * - MUMPS Users' Guide 3257 3258 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3259 @*/ 3260 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3261 { 3262 PetscFunctionBegin; 3263 PetscValidType(F, 1); 3264 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3265 PetscAssertPointer(size, 2); 3266 PetscAssertPointer(array, 3); 3267 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3268 PetscFunctionReturn(PETSC_SUCCESS); 3269 } 3270 3271 /*MC 3272 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3273 distributed and sequential matrices via the external package MUMPS. 3274 3275 Works with `MATAIJ` and `MATSBAIJ` matrices 3276 3277 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3278 3279 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3280 See details below. 3281 3282 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3283 3284 Options Database Keys: 3285 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3286 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3287 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3288 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3289 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3290 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3291 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3292 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3293 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3294 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3295 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3296 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3297 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3298 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3299 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3300 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3301 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3302 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3303 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3304 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3305 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3306 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 3307 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3308 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3309 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3310 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3311 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3312 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3313 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3314 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3315 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3316 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3317 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3318 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3319 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3320 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3321 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3322 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3323 3324 Level: beginner 3325 3326 Notes: 3327 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 3328 error if the matrix is Hermitian. 3329 3330 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3331 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3332 3333 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3334 the failure with 3335 .vb 3336 KSPGetPC(ksp,&pc); 3337 PCFactorGetMatrix(pc,&mat); 3338 MatMumpsGetInfo(mat,....); 3339 MatMumpsGetInfog(mat,....); etc. 3340 .ve 3341 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3342 3343 MUMPS provides 64-bit integer support in two build modes: 3344 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3345 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3346 3347 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3348 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3349 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3350 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3351 3352 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3353 3354 Two modes to run MUMPS/PETSc with OpenMP 3355 .vb 3356 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3357 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3358 .ve 3359 3360 .vb 3361 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3362 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3363 .ve 3364 3365 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3366 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3367 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3368 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3369 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3370 3371 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3372 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3373 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3374 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3375 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3376 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3377 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3378 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3379 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3380 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3381 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3382 examine the mapping result. 3383 3384 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3385 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3386 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3387 3388 References: 3389 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3390 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3391 3392 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3393 M*/ 3394 3395 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3396 { 3397 PetscFunctionBegin; 3398 *type = MATSOLVERMUMPS; 3399 PetscFunctionReturn(PETSC_SUCCESS); 3400 } 3401 3402 /* MatGetFactor for Seq and MPI AIJ matrices */ 3403 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3404 { 3405 Mat B; 3406 Mat_MUMPS *mumps; 3407 PetscBool isSeqAIJ, isDiag, isDense; 3408 PetscMPIInt size; 3409 3410 PetscFunctionBegin; 3411 #if defined(PETSC_USE_COMPLEX) 3412 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3413 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3414 *F = NULL; 3415 PetscFunctionReturn(PETSC_SUCCESS); 3416 } 3417 #endif 3418 /* Create the factorization matrix */ 3419 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3420 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 3421 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3422 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3423 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3424 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3425 PetscCall(MatSetUp(B)); 3426 3427 PetscCall(PetscNew(&mumps)); 3428 3429 B->ops->view = MatView_MUMPS; 3430 B->ops->getinfo = MatGetInfo_MUMPS; 3431 3432 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3433 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3434 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3435 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3436 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3437 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3438 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3439 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3440 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3441 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3442 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3443 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3444 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3445 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3446 3447 if (ftype == MAT_FACTOR_LU) { 3448 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3449 B->factortype = MAT_FACTOR_LU; 3450 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3451 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3452 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3453 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3454 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3455 mumps->sym = 0; 3456 } else { 3457 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3458 B->factortype = MAT_FACTOR_CHOLESKY; 3459 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3460 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3461 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3462 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3463 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3464 #if defined(PETSC_USE_COMPLEX) 3465 mumps->sym = 2; 3466 #else 3467 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3468 else mumps->sym = 2; 3469 #endif 3470 } 3471 3472 /* set solvertype */ 3473 PetscCall(PetscFree(B->solvertype)); 3474 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3475 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3476 if (size == 1) { 3477 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3478 B->canuseordering = PETSC_TRUE; 3479 } 3480 B->ops->destroy = MatDestroy_MUMPS; 3481 B->data = (void *)mumps; 3482 3483 *F = B; 3484 mumps->id.job = JOB_NULL; 3485 mumps->ICNTL_pre = NULL; 3486 mumps->CNTL_pre = NULL; 3487 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3488 PetscFunctionReturn(PETSC_SUCCESS); 3489 } 3490 3491 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3492 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3493 { 3494 Mat B; 3495 Mat_MUMPS *mumps; 3496 PetscBool isSeqSBAIJ; 3497 PetscMPIInt size; 3498 3499 PetscFunctionBegin; 3500 #if defined(PETSC_USE_COMPLEX) 3501 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3502 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3503 *F = NULL; 3504 PetscFunctionReturn(PETSC_SUCCESS); 3505 } 3506 #endif 3507 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3508 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3509 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3510 PetscCall(MatSetUp(B)); 3511 3512 PetscCall(PetscNew(&mumps)); 3513 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3514 if (isSeqSBAIJ) { 3515 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3516 } else { 3517 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3518 } 3519 3520 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3521 B->ops->view = MatView_MUMPS; 3522 B->ops->getinfo = MatGetInfo_MUMPS; 3523 3524 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3525 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3526 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3527 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3528 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3529 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3530 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3531 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3532 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3533 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3534 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3535 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3536 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3537 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3538 3539 B->factortype = MAT_FACTOR_CHOLESKY; 3540 #if defined(PETSC_USE_COMPLEX) 3541 mumps->sym = 2; 3542 #else 3543 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3544 else mumps->sym = 2; 3545 #endif 3546 3547 /* set solvertype */ 3548 PetscCall(PetscFree(B->solvertype)); 3549 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3550 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3551 if (size == 1) { 3552 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3553 B->canuseordering = PETSC_TRUE; 3554 } 3555 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3556 B->ops->destroy = MatDestroy_MUMPS; 3557 B->data = (void *)mumps; 3558 3559 *F = B; 3560 mumps->id.job = JOB_NULL; 3561 mumps->ICNTL_pre = NULL; 3562 mumps->CNTL_pre = NULL; 3563 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3564 PetscFunctionReturn(PETSC_SUCCESS); 3565 } 3566 3567 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3568 { 3569 Mat B; 3570 Mat_MUMPS *mumps; 3571 PetscBool isSeqBAIJ; 3572 PetscMPIInt size; 3573 3574 PetscFunctionBegin; 3575 /* Create the factorization matrix */ 3576 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3577 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3578 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3579 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3580 PetscCall(MatSetUp(B)); 3581 3582 PetscCall(PetscNew(&mumps)); 3583 if (ftype == MAT_FACTOR_LU) { 3584 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3585 B->factortype = MAT_FACTOR_LU; 3586 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3587 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3588 mumps->sym = 0; 3589 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3590 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3591 3592 B->ops->view = MatView_MUMPS; 3593 B->ops->getinfo = MatGetInfo_MUMPS; 3594 3595 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3596 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3597 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3598 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3599 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3600 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3601 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3602 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3603 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3604 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3605 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3606 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3607 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3608 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3609 3610 /* set solvertype */ 3611 PetscCall(PetscFree(B->solvertype)); 3612 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3613 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3614 if (size == 1) { 3615 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3616 B->canuseordering = PETSC_TRUE; 3617 } 3618 B->ops->destroy = MatDestroy_MUMPS; 3619 B->data = (void *)mumps; 3620 3621 *F = B; 3622 mumps->id.job = JOB_NULL; 3623 mumps->ICNTL_pre = NULL; 3624 mumps->CNTL_pre = NULL; 3625 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3626 PetscFunctionReturn(PETSC_SUCCESS); 3627 } 3628 3629 /* MatGetFactor for Seq and MPI SELL matrices */ 3630 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3631 { 3632 Mat B; 3633 Mat_MUMPS *mumps; 3634 PetscBool isSeqSELL; 3635 PetscMPIInt size; 3636 3637 PetscFunctionBegin; 3638 /* Create the factorization matrix */ 3639 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3640 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3641 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3642 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3643 PetscCall(MatSetUp(B)); 3644 3645 PetscCall(PetscNew(&mumps)); 3646 3647 B->ops->view = MatView_MUMPS; 3648 B->ops->getinfo = MatGetInfo_MUMPS; 3649 3650 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3651 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3652 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3653 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3654 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3655 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3656 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3657 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3658 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3659 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3660 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3661 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3662 3663 if (ftype == MAT_FACTOR_LU) { 3664 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3665 B->factortype = MAT_FACTOR_LU; 3666 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3667 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3668 mumps->sym = 0; 3669 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3670 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3671 3672 /* set solvertype */ 3673 PetscCall(PetscFree(B->solvertype)); 3674 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3675 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3676 if (size == 1) { 3677 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3678 B->canuseordering = PETSC_TRUE; 3679 } 3680 B->ops->destroy = MatDestroy_MUMPS; 3681 B->data = (void *)mumps; 3682 3683 *F = B; 3684 mumps->id.job = JOB_NULL; 3685 mumps->ICNTL_pre = NULL; 3686 mumps->CNTL_pre = NULL; 3687 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3688 PetscFunctionReturn(PETSC_SUCCESS); 3689 } 3690 3691 /* MatGetFactor for MATNEST matrices */ 3692 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3693 { 3694 Mat B, **mats; 3695 Mat_MUMPS *mumps; 3696 PetscInt nr, nc; 3697 PetscMPIInt size; 3698 PetscBool flg = PETSC_TRUE; 3699 3700 PetscFunctionBegin; 3701 #if defined(PETSC_USE_COMPLEX) 3702 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3703 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3704 *F = NULL; 3705 PetscFunctionReturn(PETSC_SUCCESS); 3706 } 3707 #endif 3708 3709 /* Return if some condition is not satisfied */ 3710 *F = NULL; 3711 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3712 if (ftype == MAT_FACTOR_CHOLESKY) { 3713 IS *rows, *cols; 3714 PetscInt *m, *M; 3715 3716 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3717 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3718 PetscCall(MatNestGetISs(A, rows, cols)); 3719 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3720 if (!flg) { 3721 PetscCall(PetscFree2(rows, cols)); 3722 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3723 PetscFunctionReturn(PETSC_SUCCESS); 3724 } 3725 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3726 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3727 for (PetscInt r = 0; flg && r < nr; r++) 3728 for (PetscInt k = r + 1; flg && k < nr; k++) 3729 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3730 PetscCall(PetscFree2(m, M)); 3731 PetscCall(PetscFree2(rows, cols)); 3732 if (!flg) { 3733 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3734 PetscFunctionReturn(PETSC_SUCCESS); 3735 } 3736 } 3737 3738 for (PetscInt r = 0; r < nr; r++) { 3739 for (PetscInt c = 0; c < nc; c++) { 3740 Mat sub = mats[r][c]; 3741 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isDiag, isDense; 3742 3743 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3744 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3745 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3746 else { 3747 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3748 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3749 } 3750 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3751 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3752 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3753 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3754 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3755 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3756 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 3757 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3758 if (ftype == MAT_FACTOR_CHOLESKY) { 3759 if (r == c) { 3760 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 3761 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3762 flg = PETSC_FALSE; 3763 } 3764 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3765 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3766 flg = PETSC_FALSE; 3767 } 3768 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3769 PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3770 flg = PETSC_FALSE; 3771 } 3772 } 3773 } 3774 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3775 3776 /* Create the factorization matrix */ 3777 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3778 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3779 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3780 PetscCall(MatSetUp(B)); 3781 3782 PetscCall(PetscNew(&mumps)); 3783 3784 B->ops->view = MatView_MUMPS; 3785 B->ops->getinfo = MatGetInfo_MUMPS; 3786 3787 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3788 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3789 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3790 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3791 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3792 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3793 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3794 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3795 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3796 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3797 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3798 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3799 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3800 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3801 3802 if (ftype == MAT_FACTOR_LU) { 3803 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3804 B->factortype = MAT_FACTOR_LU; 3805 mumps->sym = 0; 3806 } else { 3807 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3808 B->factortype = MAT_FACTOR_CHOLESKY; 3809 #if defined(PETSC_USE_COMPLEX) 3810 mumps->sym = 2; 3811 #else 3812 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3813 else mumps->sym = 2; 3814 #endif 3815 } 3816 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3817 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3818 3819 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3820 if (size == 1) { 3821 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3822 B->canuseordering = PETSC_TRUE; 3823 } 3824 3825 /* set solvertype */ 3826 PetscCall(PetscFree(B->solvertype)); 3827 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3828 B->ops->destroy = MatDestroy_MUMPS; 3829 B->data = (void *)mumps; 3830 3831 *F = B; 3832 mumps->id.job = JOB_NULL; 3833 mumps->ICNTL_pre = NULL; 3834 mumps->CNTL_pre = NULL; 3835 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3836 PetscFunctionReturn(PETSC_SUCCESS); 3837 } 3838 3839 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3840 { 3841 PetscFunctionBegin; 3842 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3843 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3844 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3845 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3846 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3847 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3848 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3849 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3850 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3851 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3852 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3853 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3854 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3855 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3856 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3857 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3858 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3859 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3860 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3861 PetscFunctionReturn(PETSC_SUCCESS); 3862 } 3863