1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #if defined(PETSC_USE_REAL_SINGLE) 15 #include <cmumps_c.h> 16 #else 17 #include <zmumps_c.h> 18 #endif 19 #else 20 #if defined(PETSC_USE_REAL_SINGLE) 21 #include <smumps_c.h> 22 #else 23 #include <dmumps_c.h> 24 #endif 25 #endif 26 EXTERN_C_END 27 #define JOB_INIT -1 28 #define JOB_NULL 0 29 #define JOB_FACTSYMBOLIC 1 30 #define JOB_FACTNUMERIC 2 31 #define JOB_SOLVE 3 32 #define JOB_END -2 33 34 /* calls to MUMPS */ 35 #if defined(PETSC_USE_COMPLEX) 36 #if defined(PETSC_USE_REAL_SINGLE) 37 #define MUMPS_c cmumps_c 38 #else 39 #define MUMPS_c zmumps_c 40 #endif 41 #else 42 #if defined(PETSC_USE_REAL_SINGLE) 43 #define MUMPS_c smumps_c 44 #else 45 #define MUMPS_c dmumps_c 46 #endif 47 #endif 48 49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 50 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 51 naming convention in PetscMPIInt, PetscBLASInt etc. 52 */ 53 typedef MUMPS_INT PetscMUMPSInt; 54 55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 56 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 57 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 58 #endif 59 #else 60 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 61 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 62 #endif 63 #endif 64 65 #define MPIU_MUMPSINT MPI_INT 66 #define PETSC_MUMPS_INT_MAX 2147483647 67 #define PETSC_MUMPS_INT_MIN -2147483648 68 69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 70 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 71 { 72 PetscFunctionBegin; 73 #if PetscDefined(USE_64BIT_INDICES) 74 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 75 #endif 76 *b = (PetscMUMPSInt)(a); 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 /* Put these utility routines here since they are only used in this file */ 81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 82 { 83 PetscInt myval; 84 PetscBool myset; 85 PetscFunctionBegin; 86 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 87 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 88 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 89 if (set) *set = myset; 90 PetscFunctionReturn(PETSC_SUCCESS); 91 } 92 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 93 94 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 95 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 96 #define PetscMUMPS_c(mumps) \ 97 do { \ 98 if (mumps->use_petsc_omp_support) { \ 99 if (mumps->is_omp_master) { \ 100 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 101 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 102 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 103 PetscCall(PetscFPTrapPop()); \ 104 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 105 } \ 106 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 107 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 108 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 109 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 110 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 111 */ \ 112 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 113 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 115 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 116 } else { \ 117 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 118 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 119 PetscCall(PetscFPTrapPop()); \ 120 } \ 121 } while (0) 122 #else 123 #define PetscMUMPS_c(mumps) \ 124 do { \ 125 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 126 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 127 PetscCall(PetscFPTrapPop()); \ 128 } while (0) 129 #endif 130 131 /* declare MumpsScalar */ 132 #if defined(PETSC_USE_COMPLEX) 133 #if defined(PETSC_USE_REAL_SINGLE) 134 #define MumpsScalar mumps_complex 135 #else 136 #define MumpsScalar mumps_double_complex 137 #endif 138 #else 139 #define MumpsScalar PetscScalar 140 #endif 141 142 /* macros s.t. indices match MUMPS documentation */ 143 #define ICNTL(I) icntl[(I)-1] 144 #define CNTL(I) cntl[(I)-1] 145 #define INFOG(I) infog[(I)-1] 146 #define INFO(I) info[(I)-1] 147 #define RINFOG(I) rinfog[(I)-1] 148 #define RINFO(I) rinfo[(I)-1] 149 150 typedef struct Mat_MUMPS Mat_MUMPS; 151 struct Mat_MUMPS { 152 #if defined(PETSC_USE_COMPLEX) 153 #if defined(PETSC_USE_REAL_SINGLE) 154 CMUMPS_STRUC_C id; 155 #else 156 ZMUMPS_STRUC_C id; 157 #endif 158 #else 159 #if defined(PETSC_USE_REAL_SINGLE) 160 SMUMPS_STRUC_C id; 161 #else 162 DMUMPS_STRUC_C id; 163 #endif 164 #endif 165 166 MatStructure matstruc; 167 PetscMPIInt myid, petsc_size; 168 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 169 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 170 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 171 PetscMUMPSInt sym; 172 MPI_Comm mumps_comm; 173 PetscMUMPSInt *ICNTL_pre; 174 PetscReal *CNTL_pre; 175 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 176 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 177 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 178 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 179 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 180 PetscInt *rhs_nrow, max_nrhs; 181 PetscMPIInt *rhs_recvcounts, *rhs_disps; 182 PetscScalar *rhs_loc, *rhs_recvbuf; 183 #endif 184 Vec b_seq, x_seq; 185 PetscInt ninfo, *info; /* which INFO to display */ 186 PetscInt sizeredrhs; 187 PetscScalar *schur_sol; 188 PetscInt schur_sizesol; 189 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 190 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 191 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 192 193 /* Support for MATNEST */ 194 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 195 PetscInt64 *nest_vals_start; 196 PetscScalar *nest_vals; 197 198 /* stuff used by petsc/mumps OpenMP support*/ 199 PetscBool use_petsc_omp_support; 200 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 201 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 202 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 203 PetscMPIInt tag, omp_comm_size; 204 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 205 MPI_Request *reqs; 206 }; 207 208 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 209 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 210 */ 211 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 212 { 213 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 214 215 PetscFunctionBegin; 216 #if defined(PETSC_USE_64BIT_INDICES) 217 { 218 PetscInt i; 219 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 220 PetscCall(PetscFree(mumps->ia_alloc)); 221 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 222 mumps->cur_ilen = nrow + 1; 223 } 224 if (nnz > mumps->cur_jlen) { 225 PetscCall(PetscFree(mumps->ja_alloc)); 226 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 227 mumps->cur_jlen = nnz; 228 } 229 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 230 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 231 *ia_mumps = mumps->ia_alloc; 232 *ja_mumps = mumps->ja_alloc; 233 } 234 #else 235 *ia_mumps = ia; 236 *ja_mumps = ja; 237 #endif 238 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 243 { 244 PetscFunctionBegin; 245 PetscCall(PetscFree(mumps->id.listvar_schur)); 246 PetscCall(PetscFree(mumps->id.redrhs)); 247 PetscCall(PetscFree(mumps->schur_sol)); 248 mumps->id.size_schur = 0; 249 mumps->id.schur_lld = 0; 250 mumps->id.ICNTL(19) = 0; 251 PetscFunctionReturn(PETSC_SUCCESS); 252 } 253 254 /* solve with rhs in mumps->id.redrhs and return in the same location */ 255 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 256 { 257 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 258 Mat S, B, X; 259 MatFactorSchurStatus schurstatus; 260 PetscInt sizesol; 261 262 PetscFunctionBegin; 263 PetscCall(MatFactorFactorizeSchurComplement(F)); 264 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 265 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 266 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 267 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 268 PetscCall(MatBindToCPU(B, S->boundtocpu)); 269 #endif 270 switch (schurstatus) { 271 case MAT_FACTOR_SCHUR_FACTORED: 272 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 273 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 274 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 275 PetscCall(MatBindToCPU(X, S->boundtocpu)); 276 #endif 277 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 278 PetscCall(MatMatSolveTranspose(S, B, X)); 279 } else { 280 PetscCall(MatMatSolve(S, B, X)); 281 } 282 break; 283 case MAT_FACTOR_SCHUR_INVERTED: 284 sizesol = mumps->id.nrhs * mumps->id.size_schur; 285 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 286 PetscCall(PetscFree(mumps->schur_sol)); 287 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 288 mumps->schur_sizesol = sizesol; 289 } 290 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 291 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 292 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 293 PetscCall(MatBindToCPU(X, S->boundtocpu)); 294 #endif 295 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 296 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 297 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 298 } else { 299 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 300 } 301 PetscCall(MatProductSetFromOptions(X)); 302 PetscCall(MatProductSymbolic(X)); 303 PetscCall(MatProductNumeric(X)); 304 305 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 306 break; 307 default: 308 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 309 } 310 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 311 PetscCall(MatDestroy(&B)); 312 PetscCall(MatDestroy(&X)); 313 PetscFunctionReturn(PETSC_SUCCESS); 314 } 315 316 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 317 { 318 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 319 320 PetscFunctionBegin; 321 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 if (!expansion) { /* prepare for the condensation step */ 325 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 326 /* allocate MUMPS internal array to store reduced right-hand sides */ 327 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 328 PetscCall(PetscFree(mumps->id.redrhs)); 329 mumps->id.lredrhs = mumps->id.size_schur; 330 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 331 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 332 } 333 } else { /* prepare for the expansion step */ 334 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 335 PetscCall(MatMumpsSolveSchur_Private(F)); 336 mumps->id.ICNTL(26) = 2; /* expansion phase */ 337 PetscMUMPS_c(mumps); 338 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 339 /* restore defaults */ 340 mumps->id.ICNTL(26) = -1; 341 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 342 if (mumps->id.nrhs > 1) { 343 PetscCall(PetscFree(mumps->id.redrhs)); 344 mumps->id.lredrhs = 0; 345 mumps->sizeredrhs = 0; 346 } 347 } 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 /* 352 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 353 354 input: 355 A - matrix in aij,baij or sbaij format 356 shift - 0: C style output triple; 1: Fortran style output triple. 357 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 358 MAT_REUSE_MATRIX: only the values in v array are updated 359 output: 360 nnz - dim of r, c, and v (number of local nonzero entries of A) 361 r, c, v - row and col index, matrix values (matrix triples) 362 363 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 364 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 365 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 366 367 */ 368 369 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 370 { 371 const PetscScalar *av; 372 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 373 PetscInt64 nz, rnz, i, j, k; 374 PetscMUMPSInt *row, *col; 375 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 376 377 PetscFunctionBegin; 378 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 379 if (reuse == MAT_INITIAL_MATRIX) { 380 nz = aa->nz; 381 ai = aa->i; 382 aj = aa->j; 383 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 384 for (i = k = 0; i < M; i++) { 385 rnz = ai[i + 1] - ai[i]; 386 ajj = aj + ai[i]; 387 for (j = 0; j < rnz; j++) { 388 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 389 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 390 k++; 391 } 392 } 393 mumps->val = (PetscScalar *)av; 394 mumps->irn = row; 395 mumps->jcn = col; 396 mumps->nnz = nz; 397 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 398 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 399 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 404 { 405 PetscInt64 nz, i, j, k, r; 406 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 407 PetscMUMPSInt *row, *col; 408 409 PetscFunctionBegin; 410 nz = a->sliidx[a->totalslices]; 411 if (reuse == MAT_INITIAL_MATRIX) { 412 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 413 for (i = k = 0; i < a->totalslices; i++) { 414 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 415 } 416 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 417 mumps->irn = row; 418 mumps->jcn = col; 419 mumps->nnz = nz; 420 mumps->val = a->val; 421 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 422 else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 427 { 428 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 429 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 430 PetscInt64 M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 431 PetscInt bs; 432 PetscMUMPSInt *row, *col; 433 434 PetscFunctionBegin; 435 if (reuse == MAT_INITIAL_MATRIX) { 436 PetscCall(MatGetBlockSize(A, &bs)); 437 M = A->rmap->N / bs; 438 ai = aa->i; 439 aj = aa->j; 440 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 441 for (i = 0; i < M; i++) { 442 ajj = aj + ai[i]; 443 rnz = ai[i + 1] - ai[i]; 444 for (k = 0; k < rnz; k++) { 445 for (j = 0; j < bs; j++) { 446 for (m = 0; m < bs; m++) { 447 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 448 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 449 idx++; 450 } 451 } 452 } 453 } 454 mumps->irn = row; 455 mumps->jcn = col; 456 mumps->nnz = nz; 457 mumps->val = aa->a; 458 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 459 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 460 PetscFunctionReturn(PETSC_SUCCESS); 461 } 462 463 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 464 { 465 const PetscInt *ai, *aj, *ajj; 466 PetscInt bs; 467 PetscInt64 nz, rnz, i, j, k, m; 468 PetscMUMPSInt *row, *col; 469 PetscScalar *val; 470 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 471 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 472 #if defined(PETSC_USE_COMPLEX) 473 PetscBool isset, hermitian; 474 #endif 475 476 PetscFunctionBegin; 477 #if defined(PETSC_USE_COMPLEX) 478 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 479 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 480 #endif 481 ai = aa->i; 482 aj = aa->j; 483 PetscCall(MatGetBlockSize(A, &bs)); 484 if (reuse == MAT_INITIAL_MATRIX) { 485 const PetscInt64 alloc_size = aa->nz * bs2; 486 487 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 488 if (bs > 1) { 489 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 490 mumps->val = mumps->val_alloc; 491 } else { 492 mumps->val = aa->a; 493 } 494 mumps->irn = row; 495 mumps->jcn = col; 496 } else { 497 row = mumps->irn; 498 col = mumps->jcn; 499 } 500 val = mumps->val; 501 502 nz = 0; 503 if (bs > 1) { 504 for (i = 0; i < mbs; i++) { 505 rnz = ai[i + 1] - ai[i]; 506 ajj = aj + ai[i]; 507 for (j = 0; j < rnz; j++) { 508 for (k = 0; k < bs; k++) { 509 for (m = 0; m < bs; m++) { 510 if (ajj[j] > i || k >= m) { 511 if (reuse == MAT_INITIAL_MATRIX) { 512 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 513 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 514 } 515 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 516 } 517 } 518 } 519 } 520 } 521 } else if (reuse == MAT_INITIAL_MATRIX) { 522 for (i = 0; i < mbs; i++) { 523 rnz = ai[i + 1] - ai[i]; 524 ajj = aj + ai[i]; 525 for (j = 0; j < rnz; j++) { 526 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 527 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 528 nz++; 529 } 530 } 531 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 532 } else if (mumps->nest_vals) 533 PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 534 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 535 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 536 PetscFunctionReturn(PETSC_SUCCESS); 537 } 538 539 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 540 { 541 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 542 PetscInt64 nz, rnz, i, j; 543 const PetscScalar *av, *v1; 544 PetscScalar *val; 545 PetscMUMPSInt *row, *col; 546 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 547 PetscBool missing; 548 #if defined(PETSC_USE_COMPLEX) 549 PetscBool hermitian, isset; 550 #endif 551 552 PetscFunctionBegin; 553 #if defined(PETSC_USE_COMPLEX) 554 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 555 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 556 #endif 557 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 558 ai = aa->i; 559 aj = aa->j; 560 adiag = aa->diag; 561 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 562 if (reuse == MAT_INITIAL_MATRIX) { 563 /* count nz in the upper triangular part of A */ 564 nz = 0; 565 if (missing) { 566 for (i = 0; i < M; i++) { 567 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 568 for (j = ai[i]; j < ai[i + 1]; j++) { 569 if (aj[j] < i) continue; 570 nz++; 571 } 572 } else { 573 nz += ai[i + 1] - adiag[i]; 574 } 575 } 576 } else { 577 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 578 } 579 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 580 PetscCall(PetscMalloc1(nz, &val)); 581 mumps->nnz = nz; 582 mumps->irn = row; 583 mumps->jcn = col; 584 mumps->val = mumps->val_alloc = val; 585 586 nz = 0; 587 if (missing) { 588 for (i = 0; i < M; i++) { 589 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 590 for (j = ai[i]; j < ai[i + 1]; j++) { 591 if (aj[j] < i) continue; 592 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 593 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 594 val[nz] = av[j]; 595 nz++; 596 } 597 } else { 598 rnz = ai[i + 1] - adiag[i]; 599 ajj = aj + adiag[i]; 600 v1 = av + adiag[i]; 601 for (j = 0; j < rnz; j++) { 602 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 603 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 604 val[nz++] = v1[j]; 605 } 606 } 607 } 608 } else { 609 for (i = 0; i < M; i++) { 610 rnz = ai[i + 1] - adiag[i]; 611 ajj = aj + adiag[i]; 612 v1 = av + adiag[i]; 613 for (j = 0; j < rnz; j++) { 614 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 615 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 616 val[nz++] = v1[j]; 617 } 618 } 619 } 620 } else { 621 nz = 0; 622 val = mumps->val; 623 if (missing) { 624 for (i = 0; i < M; i++) { 625 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 626 for (j = ai[i]; j < ai[i + 1]; j++) { 627 if (aj[j] < i) continue; 628 val[nz++] = av[j]; 629 } 630 } else { 631 rnz = ai[i + 1] - adiag[i]; 632 v1 = av + adiag[i]; 633 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 634 } 635 } 636 } else { 637 for (i = 0; i < M; i++) { 638 rnz = ai[i + 1] - adiag[i]; 639 v1 = av + adiag[i]; 640 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 641 } 642 } 643 } 644 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 645 PetscFunctionReturn(PETSC_SUCCESS); 646 } 647 648 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 649 { 650 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 651 PetscInt bs; 652 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 653 PetscMUMPSInt *row, *col; 654 const PetscScalar *av, *bv, *v1, *v2; 655 PetscScalar *val; 656 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 657 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 658 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 659 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 660 #if defined(PETSC_USE_COMPLEX) 661 PetscBool hermitian, isset; 662 #endif 663 664 PetscFunctionBegin; 665 #if defined(PETSC_USE_COMPLEX) 666 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 667 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 668 #endif 669 PetscCall(MatGetBlockSize(A, &bs)); 670 rstart = A->rmap->rstart; 671 ai = aa->i; 672 aj = aa->j; 673 bi = bb->i; 674 bj = bb->j; 675 av = aa->a; 676 bv = bb->a; 677 678 garray = mat->garray; 679 680 if (reuse == MAT_INITIAL_MATRIX) { 681 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 682 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 683 PetscCall(PetscMalloc1(nz, &val)); 684 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 685 mumps->irn = row; 686 mumps->jcn = col; 687 mumps->val = mumps->val_alloc = val; 688 } else { 689 val = mumps->val; 690 } 691 692 jj = 0; 693 irow = rstart; 694 for (i = 0; i < mbs; i++) { 695 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 696 countA = ai[i + 1] - ai[i]; 697 countB = bi[i + 1] - bi[i]; 698 bjj = bj + bi[i]; 699 v1 = av + ai[i] * bs2; 700 v2 = bv + bi[i] * bs2; 701 702 if (bs > 1) { 703 /* A-part */ 704 for (j = 0; j < countA; j++) { 705 for (k = 0; k < bs; k++) { 706 for (m = 0; m < bs; m++) { 707 if (rstart + ajj[j] * bs > irow || k >= m) { 708 if (reuse == MAT_INITIAL_MATRIX) { 709 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 710 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 711 } 712 val[jj++] = v1[j * bs2 + m + k * bs]; 713 } 714 } 715 } 716 } 717 718 /* B-part */ 719 for (j = 0; j < countB; j++) { 720 for (k = 0; k < bs; k++) { 721 for (m = 0; m < bs; m++) { 722 if (reuse == MAT_INITIAL_MATRIX) { 723 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 724 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 725 } 726 val[jj++] = v2[j * bs2 + m + k * bs]; 727 } 728 } 729 } 730 } else { 731 /* A-part */ 732 for (j = 0; j < countA; j++) { 733 if (reuse == MAT_INITIAL_MATRIX) { 734 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 735 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 736 } 737 val[jj++] = v1[j]; 738 } 739 740 /* B-part */ 741 for (j = 0; j < countB; j++) { 742 if (reuse == MAT_INITIAL_MATRIX) { 743 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 744 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 745 } 746 val[jj++] = v2[j]; 747 } 748 } 749 irow += bs; 750 } 751 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 752 PetscFunctionReturn(PETSC_SUCCESS); 753 } 754 755 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 756 { 757 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 758 PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 759 PetscMUMPSInt *row, *col; 760 const PetscScalar *av, *bv, *v1, *v2; 761 PetscScalar *val; 762 Mat Ad, Ao; 763 Mat_SeqAIJ *aa; 764 Mat_SeqAIJ *bb; 765 766 PetscFunctionBegin; 767 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 768 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 769 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 770 771 aa = (Mat_SeqAIJ *)(Ad)->data; 772 bb = (Mat_SeqAIJ *)(Ao)->data; 773 ai = aa->i; 774 aj = aa->j; 775 bi = bb->i; 776 bj = bb->j; 777 778 rstart = A->rmap->rstart; 779 cstart = A->cmap->rstart; 780 781 if (reuse == MAT_INITIAL_MATRIX) { 782 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 783 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 784 PetscCall(PetscMalloc1(nz, &val)); 785 mumps->nnz = nz; 786 mumps->irn = row; 787 mumps->jcn = col; 788 mumps->val = mumps->val_alloc = val; 789 } else { 790 val = mumps->val; 791 } 792 793 jj = 0; 794 irow = rstart; 795 for (i = 0; i < m; i++) { 796 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 797 countA = ai[i + 1] - ai[i]; 798 countB = bi[i + 1] - bi[i]; 799 bjj = bj + bi[i]; 800 v1 = av + ai[i]; 801 v2 = bv + bi[i]; 802 803 /* A-part */ 804 for (j = 0; j < countA; j++) { 805 if (reuse == MAT_INITIAL_MATRIX) { 806 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 807 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 808 } 809 val[jj++] = v1[j]; 810 } 811 812 /* B-part */ 813 for (j = 0; j < countB; j++) { 814 if (reuse == MAT_INITIAL_MATRIX) { 815 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 816 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 817 } 818 val[jj++] = v2[j]; 819 } 820 irow++; 821 } 822 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 823 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 824 PetscFunctionReturn(PETSC_SUCCESS); 825 } 826 827 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 828 { 829 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 830 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 831 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 832 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 833 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 834 const PetscInt bs2 = mat->bs2; 835 PetscInt bs; 836 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 837 PetscMUMPSInt *row, *col; 838 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 839 PetscScalar *val; 840 841 PetscFunctionBegin; 842 PetscCall(MatGetBlockSize(A, &bs)); 843 if (reuse == MAT_INITIAL_MATRIX) { 844 nz = bs2 * (aa->nz + bb->nz); 845 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 846 PetscCall(PetscMalloc1(nz, &val)); 847 mumps->nnz = nz; 848 mumps->irn = row; 849 mumps->jcn = col; 850 mumps->val = mumps->val_alloc = val; 851 } else { 852 val = mumps->val; 853 } 854 855 jj = 0; 856 irow = rstart; 857 for (i = 0; i < mbs; i++) { 858 countA = ai[i + 1] - ai[i]; 859 countB = bi[i + 1] - bi[i]; 860 ajj = aj + ai[i]; 861 bjj = bj + bi[i]; 862 v1 = av + bs2 * ai[i]; 863 v2 = bv + bs2 * bi[i]; 864 865 idx = 0; 866 /* A-part */ 867 for (k = 0; k < countA; k++) { 868 for (j = 0; j < bs; j++) { 869 for (n = 0; n < bs; n++) { 870 if (reuse == MAT_INITIAL_MATRIX) { 871 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 872 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 873 } 874 val[jj++] = v1[idx++]; 875 } 876 } 877 } 878 879 idx = 0; 880 /* B-part */ 881 for (k = 0; k < countB; k++) { 882 for (j = 0; j < bs; j++) { 883 for (n = 0; n < bs; n++) { 884 if (reuse == MAT_INITIAL_MATRIX) { 885 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 886 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 887 } 888 val[jj++] = v2[idx++]; 889 } 890 } 891 } 892 irow += bs; 893 } 894 PetscFunctionReturn(PETSC_SUCCESS); 895 } 896 897 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 898 { 899 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 900 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 901 PetscMUMPSInt *row, *col; 902 const PetscScalar *av, *bv, *v1, *v2; 903 PetscScalar *val; 904 Mat Ad, Ao; 905 Mat_SeqAIJ *aa; 906 Mat_SeqAIJ *bb; 907 #if defined(PETSC_USE_COMPLEX) 908 PetscBool hermitian, isset; 909 #endif 910 911 PetscFunctionBegin; 912 #if defined(PETSC_USE_COMPLEX) 913 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 914 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 915 #endif 916 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 917 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 918 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 919 920 aa = (Mat_SeqAIJ *)(Ad)->data; 921 bb = (Mat_SeqAIJ *)(Ao)->data; 922 ai = aa->i; 923 aj = aa->j; 924 adiag = aa->diag; 925 bi = bb->i; 926 bj = bb->j; 927 928 rstart = A->rmap->rstart; 929 930 if (reuse == MAT_INITIAL_MATRIX) { 931 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 932 nzb = 0; /* num of upper triangular entries in mat->B */ 933 for (i = 0; i < m; i++) { 934 nza += (ai[i + 1] - adiag[i]); 935 countB = bi[i + 1] - bi[i]; 936 bjj = bj + bi[i]; 937 for (j = 0; j < countB; j++) { 938 if (garray[bjj[j]] > rstart) nzb++; 939 } 940 } 941 942 nz = nza + nzb; /* total nz of upper triangular part of mat */ 943 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 944 PetscCall(PetscMalloc1(nz, &val)); 945 mumps->nnz = nz; 946 mumps->irn = row; 947 mumps->jcn = col; 948 mumps->val = mumps->val_alloc = val; 949 } else { 950 val = mumps->val; 951 } 952 953 jj = 0; 954 irow = rstart; 955 for (i = 0; i < m; i++) { 956 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 957 v1 = av + adiag[i]; 958 countA = ai[i + 1] - adiag[i]; 959 countB = bi[i + 1] - bi[i]; 960 bjj = bj + bi[i]; 961 v2 = bv + bi[i]; 962 963 /* A-part */ 964 for (j = 0; j < countA; j++) { 965 if (reuse == MAT_INITIAL_MATRIX) { 966 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 967 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 968 } 969 val[jj++] = v1[j]; 970 } 971 972 /* B-part */ 973 for (j = 0; j < countB; j++) { 974 if (garray[bjj[j]] > rstart) { 975 if (reuse == MAT_INITIAL_MATRIX) { 976 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 977 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 978 } 979 val[jj++] = v2[j]; 980 } 981 } 982 irow++; 983 } 984 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 985 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 986 PetscFunctionReturn(PETSC_SUCCESS); 987 } 988 989 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 990 { 991 const PetscScalar *av; 992 const PetscInt M = A->rmap->n; 993 PetscInt64 i; 994 PetscMUMPSInt *row, *col; 995 Vec v; 996 997 PetscFunctionBegin; 998 PetscCall(MatDiagonalGetDiagonal(A, &v)); 999 PetscCall(VecGetArrayRead(v, &av)); 1000 if (reuse == MAT_INITIAL_MATRIX) { 1001 PetscCall(PetscMalloc2(M, &row, M, &col)); 1002 for (i = 0; i < M; i++) { 1003 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1004 col[i] = row[i]; 1005 } 1006 mumps->val = (PetscScalar *)av; 1007 mumps->irn = row; 1008 mumps->jcn = col; 1009 mumps->nnz = M; 1010 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1011 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1012 PetscCall(VecRestoreArrayRead(v, &av)); 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1017 { 1018 PetscScalar *v; 1019 const PetscInt m = A->rmap->n, N = A->cmap->N; 1020 PetscInt lda; 1021 PetscInt64 i, j; 1022 PetscMUMPSInt *row, *col; 1023 1024 PetscFunctionBegin; 1025 PetscCall(MatDenseGetArray(A, &v)); 1026 PetscCall(MatDenseGetLDA(A, &lda)); 1027 if (reuse == MAT_INITIAL_MATRIX) { 1028 PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 1029 for (i = 0; i < m; i++) { 1030 col[i] = 0; 1031 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1032 } 1033 for (j = 1; j < N; j++) { 1034 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 1035 PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 1036 } 1037 if (lda == m) mumps->val = v; 1038 else { 1039 PetscCall(PetscMalloc1(m * N, &mumps->val)); 1040 mumps->val_alloc = mumps->val; 1041 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1042 } 1043 mumps->irn = row; 1044 mumps->jcn = col; 1045 mumps->nnz = m * N; 1046 } else { 1047 if (lda == m && !mumps->nest_vals) mumps->val = v; 1048 else { 1049 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1050 } 1051 } 1052 PetscCall(MatDenseRestoreArray(A, &v)); 1053 PetscFunctionReturn(PETSC_SUCCESS); 1054 } 1055 1056 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1057 { 1058 Mat **mats; 1059 PetscInt nr, nc; 1060 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1064 if (reuse == MAT_INITIAL_MATRIX) { 1065 PetscMUMPSInt *irns, *jcns; 1066 PetscScalar *vals; 1067 PetscInt64 totnnz, cumnnz, maxnnz; 1068 PetscInt *pjcns_w; 1069 IS *rows, *cols; 1070 PetscInt **rows_idx, **cols_idx; 1071 1072 cumnnz = 0; 1073 maxnnz = 0; 1074 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1075 for (PetscInt r = 0; r < nr; r++) { 1076 for (PetscInt c = 0; c < nc; c++) { 1077 Mat sub = mats[r][c]; 1078 1079 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1080 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1081 if (sub) { 1082 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1083 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE, isDiag, isDense; 1084 MatInfo info; 1085 1086 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1087 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1088 else { 1089 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1090 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1091 } 1092 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1093 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1094 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1095 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1096 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1097 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1098 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1099 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 1100 1101 if (chol) { 1102 if (r == c) { 1103 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1104 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1105 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1106 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1107 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1108 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1109 } else { 1110 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1111 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1112 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1113 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1114 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1115 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1116 } 1117 } else { 1118 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1119 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1120 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1121 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1122 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1123 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1124 } 1125 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1126 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1127 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1128 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1129 maxnnz = PetscMax(maxnnz, info.nz_used); 1130 } 1131 } 1132 } 1133 1134 /* Allocate total COO */ 1135 totnnz = cumnnz; 1136 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1137 PetscCall(PetscMalloc1(totnnz, &vals)); 1138 1139 /* Handle rows and column maps 1140 We directly map rows and use an SF for the columns */ 1141 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1142 PetscCall(MatNestGetISs(A, rows, cols)); 1143 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1144 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1145 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1146 else (void)maxnnz; 1147 1148 cumnnz = 0; 1149 for (PetscInt r = 0; r < nr; r++) { 1150 for (PetscInt c = 0; c < nc; c++) { 1151 Mat sub = mats[r][c]; 1152 const PetscInt *ridx = rows_idx[r]; 1153 const PetscInt *cidx = cols_idx[c]; 1154 PetscInt rst; 1155 PetscSF csf; 1156 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1157 PetscLayout cmap; 1158 1159 mumps->nest_vals_start[r * nc + c] = cumnnz; 1160 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1161 1162 /* Extract inner blocks if needed */ 1163 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1164 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1165 else { 1166 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1167 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1168 } 1169 swap = (PetscBool)(isTrans || isHTrans); 1170 1171 /* Get column layout to map off-process columns */ 1172 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1173 1174 /* Get row start to map on-process rows */ 1175 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1176 1177 /* Directly use the mumps datastructure and use C ordering for now */ 1178 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1179 1180 /* Swap the role of rows and columns indices for transposed blocks 1181 since we need values with global final ordering */ 1182 if (swap) { 1183 cidx = rows_idx[r]; 1184 ridx = cols_idx[c]; 1185 } 1186 1187 /* Communicate column indices 1188 This could have been done with a single SF but it would have complicated the code a lot. 1189 But since we do it only once, we pay the price of setting up an SF for each block */ 1190 if (PetscDefined(USE_64BIT_INDICES)) { 1191 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1192 } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 1193 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1194 PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1195 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1196 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1197 PetscCall(PetscSFDestroy(&csf)); 1198 1199 /* Import indices: use direct map for rows and mapped indices for columns */ 1200 if (swap) { 1201 for (PetscInt k = 0; k < mumps->nnz; k++) { 1202 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1203 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1204 } 1205 } else { 1206 for (PetscInt k = 0; k < mumps->nnz; k++) { 1207 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1208 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1209 } 1210 } 1211 1212 /* Import values to full COO */ 1213 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1214 if (isHTrans) { /* conjugate the entries */ 1215 PetscScalar *v = vals + cumnnz; 1216 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]); 1217 } 1218 1219 /* Shift new starting point and sanity check */ 1220 cumnnz += mumps->nnz; 1221 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1222 1223 /* Free scratch memory */ 1224 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1225 PetscCall(PetscFree(mumps->val_alloc)); 1226 mumps->val = NULL; 1227 mumps->nnz = 0; 1228 } 1229 } 1230 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1231 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1232 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1233 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1234 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1235 mumps->nest_vals_start[nr * nc] = cumnnz; 1236 1237 /* Set pointers for final MUMPS data structure */ 1238 mumps->nest_vals = vals; 1239 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1240 mumps->val = vals; 1241 mumps->irn = irns; 1242 mumps->jcn = jcns; 1243 mumps->nnz = cumnnz; 1244 } else { 1245 PetscScalar *oval = mumps->nest_vals; 1246 for (PetscInt r = 0; r < nr; r++) { 1247 for (PetscInt c = 0; c < nc; c++) { 1248 PetscBool isTrans, isHTrans = PETSC_FALSE; 1249 Mat sub = mats[r][c]; 1250 PetscInt midx = r * nc + c; 1251 1252 if (!mumps->nest_convert_to_triples[midx]) continue; 1253 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1254 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1255 else { 1256 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1257 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1258 } 1259 mumps->val = oval + mumps->nest_vals_start[midx]; 1260 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1261 if (isHTrans) { 1262 PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1263 for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1264 } 1265 } 1266 } 1267 mumps->val = oval; 1268 } 1269 PetscFunctionReturn(PETSC_SUCCESS); 1270 } 1271 1272 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1273 { 1274 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1275 1276 PetscFunctionBegin; 1277 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1278 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1279 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1280 PetscCall(VecDestroy(&mumps->b_seq)); 1281 PetscCall(VecDestroy(&mumps->x_seq)); 1282 PetscCall(PetscFree(mumps->id.perm_in)); 1283 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1284 PetscCall(PetscFree(mumps->val_alloc)); 1285 PetscCall(PetscFree(mumps->info)); 1286 PetscCall(PetscFree(mumps->ICNTL_pre)); 1287 PetscCall(PetscFree(mumps->CNTL_pre)); 1288 PetscCall(MatMumpsResetSchur_Private(mumps)); 1289 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1290 mumps->id.job = JOB_END; 1291 PetscMUMPS_c(mumps); 1292 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1293 if (mumps->mumps_comm != MPI_COMM_NULL) { 1294 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1295 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1296 } 1297 } 1298 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1299 if (mumps->use_petsc_omp_support) { 1300 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1301 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1302 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1303 } 1304 #endif 1305 PetscCall(PetscFree(mumps->ia_alloc)); 1306 PetscCall(PetscFree(mumps->ja_alloc)); 1307 PetscCall(PetscFree(mumps->recvcount)); 1308 PetscCall(PetscFree(mumps->reqs)); 1309 PetscCall(PetscFree(mumps->irhs_loc)); 1310 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1311 PetscCall(PetscFree(mumps->nest_vals)); 1312 PetscCall(PetscFree(A->data)); 1313 1314 /* clear composed functions */ 1315 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1316 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1317 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1318 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1319 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1320 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1321 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1322 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1323 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1324 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1325 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1326 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1327 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1328 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1329 PetscFunctionReturn(PETSC_SUCCESS); 1330 } 1331 1332 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1333 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1334 { 1335 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1336 const PetscMPIInt ompsize = mumps->omp_comm_size; 1337 PetscInt i, m, M, rstart; 1338 1339 PetscFunctionBegin; 1340 PetscCall(MatGetSize(A, &M, NULL)); 1341 PetscCall(MatGetLocalSize(A, &m, NULL)); 1342 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1343 if (ompsize == 1) { 1344 if (!mumps->irhs_loc) { 1345 mumps->nloc_rhs = m; 1346 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1347 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1348 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1349 } 1350 mumps->id.rhs_loc = (MumpsScalar *)array; 1351 } else { 1352 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1353 const PetscInt *ranges; 1354 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1355 MPI_Group petsc_group, omp_group; 1356 PetscScalar *recvbuf = NULL; 1357 1358 if (mumps->is_omp_master) { 1359 /* Lazily initialize the omp stuff for distributed rhs */ 1360 if (!mumps->irhs_loc) { 1361 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1362 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1363 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1364 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1365 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1366 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1367 1368 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1369 mumps->nloc_rhs = 0; 1370 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1371 for (j = 0; j < ompsize; j++) { 1372 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1373 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1374 } 1375 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1376 for (j = k = 0; j < ompsize; j++) { 1377 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1378 } 1379 1380 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1381 PetscCallMPI(MPI_Group_free(&petsc_group)); 1382 PetscCallMPI(MPI_Group_free(&omp_group)); 1383 } 1384 1385 /* Realloc buffers when current nrhs is bigger than what we have met */ 1386 if (nrhs > mumps->max_nrhs) { 1387 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1388 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1389 mumps->max_nrhs = nrhs; 1390 } 1391 1392 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1393 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1394 mumps->rhs_disps[0] = 0; 1395 for (j = 1; j < ompsize; j++) { 1396 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1397 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1398 } 1399 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1400 } 1401 1402 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1403 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1404 1405 if (mumps->is_omp_master) { 1406 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1407 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1408 for (j = 0; j < ompsize; j++) { 1409 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1410 dst = dstbase; 1411 for (i = 0; i < nrhs; i++) { 1412 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1413 src += mumps->rhs_nrow[j]; 1414 dst += mumps->nloc_rhs; 1415 } 1416 dstbase += mumps->rhs_nrow[j]; 1417 } 1418 } 1419 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1420 } 1421 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1422 } 1423 mumps->id.nrhs = nrhs; 1424 mumps->id.nloc_rhs = mumps->nloc_rhs; 1425 mumps->id.lrhs_loc = mumps->nloc_rhs; 1426 mumps->id.irhs_loc = mumps->irhs_loc; 1427 PetscFunctionReturn(PETSC_SUCCESS); 1428 } 1429 1430 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1431 { 1432 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1433 const PetscScalar *rarray = NULL; 1434 PetscScalar *array; 1435 IS is_iden, is_petsc; 1436 PetscInt i; 1437 PetscBool second_solve = PETSC_FALSE; 1438 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1439 1440 PetscFunctionBegin; 1441 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1442 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1443 &cite1)); 1444 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1445 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1446 &cite2)); 1447 1448 if (A->factorerrortype) { 1449 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1450 PetscCall(VecSetInf(x)); 1451 PetscFunctionReturn(PETSC_SUCCESS); 1452 } 1453 1454 mumps->id.nrhs = 1; 1455 if (mumps->petsc_size > 1) { 1456 if (mumps->ICNTL20 == 10) { 1457 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1458 PetscCall(VecGetArrayRead(b, &rarray)); 1459 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1460 } else { 1461 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1462 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1463 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1464 if (!mumps->myid) { 1465 PetscCall(VecGetArray(mumps->b_seq, &array)); 1466 mumps->id.rhs = (MumpsScalar *)array; 1467 } 1468 } 1469 } else { /* petsc_size == 1 */ 1470 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1471 PetscCall(VecCopy(b, x)); 1472 PetscCall(VecGetArray(x, &array)); 1473 mumps->id.rhs = (MumpsScalar *)array; 1474 } 1475 1476 /* 1477 handle condensation step of Schur complement (if any) 1478 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1479 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1480 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1481 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1482 */ 1483 if (mumps->id.size_schur > 0) { 1484 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1485 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1486 second_solve = PETSC_TRUE; 1487 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1488 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1489 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1490 } 1491 /* solve phase */ 1492 mumps->id.job = JOB_SOLVE; 1493 PetscMUMPS_c(mumps); 1494 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1495 1496 /* handle expansion step of Schur complement (if any) */ 1497 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1498 else if (mumps->id.ICNTL(26) == 1) { 1499 PetscCall(MatMumpsSolveSchur_Private(A)); 1500 for (i = 0; i < mumps->id.size_schur; ++i) { 1501 #if !defined(PETSC_USE_COMPLEX) 1502 PetscScalar val = mumps->id.redrhs[i]; 1503 #else 1504 PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i; 1505 #endif 1506 array[mumps->id.listvar_schur[i] - 1] = val; 1507 } 1508 } 1509 1510 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1511 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1512 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1513 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1514 } 1515 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1516 PetscInt *isol2_loc = NULL; 1517 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1518 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1519 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1520 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1521 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1522 PetscCall(ISDestroy(&is_iden)); 1523 PetscCall(ISDestroy(&is_petsc)); 1524 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1525 } 1526 1527 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1528 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1529 } 1530 1531 if (mumps->petsc_size > 1) { 1532 if (mumps->ICNTL20 == 10) { 1533 PetscCall(VecRestoreArrayRead(b, &rarray)); 1534 } else if (!mumps->myid) { 1535 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1536 } 1537 } else PetscCall(VecRestoreArray(x, &array)); 1538 1539 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1540 PetscFunctionReturn(PETSC_SUCCESS); 1541 } 1542 1543 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1544 { 1545 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1546 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1547 1548 PetscFunctionBegin; 1549 mumps->id.ICNTL(9) = 0; 1550 PetscCall(MatSolve_MUMPS(A, b, x)); 1551 mumps->id.ICNTL(9) = value; 1552 PetscFunctionReturn(PETSC_SUCCESS); 1553 } 1554 1555 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1556 { 1557 Mat Bt = NULL; 1558 PetscBool denseX, denseB, flg, flgT; 1559 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1560 PetscInt i, nrhs, M; 1561 PetscScalar *array; 1562 const PetscScalar *rbray; 1563 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1564 PetscMUMPSInt *isol_loc, *isol_loc_save; 1565 PetscScalar *bray, *sol_loc, *sol_loc_save; 1566 IS is_to, is_from; 1567 PetscInt k, proc, j, m, myrstart; 1568 const PetscInt *rstart; 1569 Vec v_mpi, msol_loc; 1570 VecScatter scat_sol; 1571 Vec b_seq; 1572 VecScatter scat_rhs; 1573 PetscScalar *aa; 1574 PetscInt spnr, *ia, *ja; 1575 Mat_MPIAIJ *b = NULL; 1576 1577 PetscFunctionBegin; 1578 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1579 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1580 1581 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1582 if (denseB) { 1583 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1584 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1585 } else { /* sparse B */ 1586 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1587 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1588 if (flgT) { /* input B is transpose of actual RHS matrix, 1589 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1590 PetscCall(MatTransposeGetMat(B, &Bt)); 1591 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1592 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1593 } 1594 1595 PetscCall(MatGetSize(B, &M, &nrhs)); 1596 mumps->id.nrhs = nrhs; 1597 mumps->id.lrhs = M; 1598 mumps->id.rhs = NULL; 1599 1600 if (mumps->petsc_size == 1) { 1601 PetscScalar *aa; 1602 PetscInt spnr, *ia, *ja; 1603 PetscBool second_solve = PETSC_FALSE; 1604 1605 PetscCall(MatDenseGetArray(X, &array)); 1606 mumps->id.rhs = (MumpsScalar *)array; 1607 1608 if (denseB) { 1609 /* copy B to X */ 1610 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1611 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1612 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1613 } else { /* sparse B */ 1614 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1615 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1616 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1617 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1618 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1619 } 1620 /* handle condensation step of Schur complement (if any) */ 1621 if (mumps->id.size_schur > 0) { 1622 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1623 second_solve = PETSC_TRUE; 1624 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1625 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1626 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1627 } 1628 /* solve phase */ 1629 mumps->id.job = JOB_SOLVE; 1630 PetscMUMPS_c(mumps); 1631 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1632 1633 /* handle expansion step of Schur complement (if any) */ 1634 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1635 else if (mumps->id.ICNTL(26) == 1) { 1636 PetscCall(MatMumpsSolveSchur_Private(A)); 1637 for (j = 0; j < nrhs; ++j) 1638 for (i = 0; i < mumps->id.size_schur; ++i) { 1639 #if !defined(PETSC_USE_COMPLEX) 1640 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs]; 1641 #else 1642 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i; 1643 #endif 1644 array[mumps->id.listvar_schur[i] - 1 + j * M] = val; 1645 } 1646 } 1647 if (!denseB) { /* sparse B */ 1648 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1649 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1650 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1651 } 1652 PetscCall(MatDenseRestoreArray(X, &array)); 1653 PetscFunctionReturn(PETSC_SUCCESS); 1654 } 1655 1656 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1657 PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1658 1659 /* create msol_loc to hold mumps local solution */ 1660 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1661 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1662 1663 lsol_loc = mumps->id.lsol_loc; 1664 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1665 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1666 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1667 mumps->id.isol_loc = isol_loc; 1668 1669 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1670 1671 if (denseB) { 1672 if (mumps->ICNTL20 == 10) { 1673 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1674 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1675 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1676 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1677 PetscCall(MatGetLocalSize(B, &m, NULL)); 1678 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1679 } else { 1680 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1681 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1682 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1683 0, re-arrange B into desired order, which is a local operation. 1684 */ 1685 1686 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1687 /* wrap dense rhs matrix B into a vector v_mpi */ 1688 PetscCall(MatGetLocalSize(B, &m, NULL)); 1689 PetscCall(MatDenseGetArray(B, &bray)); 1690 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1691 PetscCall(MatDenseRestoreArray(B, &bray)); 1692 1693 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1694 if (!mumps->myid) { 1695 PetscInt *idx; 1696 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1697 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1698 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1699 k = 0; 1700 for (proc = 0; proc < mumps->petsc_size; proc++) { 1701 for (j = 0; j < nrhs; j++) { 1702 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1703 } 1704 } 1705 1706 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1707 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1708 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1709 } else { 1710 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1711 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1712 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1713 } 1714 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1715 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1716 PetscCall(ISDestroy(&is_to)); 1717 PetscCall(ISDestroy(&is_from)); 1718 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1719 1720 if (!mumps->myid) { /* define rhs on the host */ 1721 PetscCall(VecGetArray(b_seq, &bray)); 1722 mumps->id.rhs = (MumpsScalar *)bray; 1723 PetscCall(VecRestoreArray(b_seq, &bray)); 1724 } 1725 } 1726 } else { /* sparse B */ 1727 b = (Mat_MPIAIJ *)Bt->data; 1728 1729 /* wrap dense X into a vector v_mpi */ 1730 PetscCall(MatGetLocalSize(X, &m, NULL)); 1731 PetscCall(MatDenseGetArray(X, &bray)); 1732 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1733 PetscCall(MatDenseRestoreArray(X, &bray)); 1734 1735 if (!mumps->myid) { 1736 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1737 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1738 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1739 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1740 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1741 } else { 1742 mumps->id.irhs_ptr = NULL; 1743 mumps->id.irhs_sparse = NULL; 1744 mumps->id.nz_rhs = 0; 1745 mumps->id.rhs_sparse = NULL; 1746 } 1747 } 1748 1749 /* solve phase */ 1750 mumps->id.job = JOB_SOLVE; 1751 PetscMUMPS_c(mumps); 1752 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1753 1754 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1755 PetscCall(MatDenseGetArray(X, &array)); 1756 PetscCall(VecPlaceArray(v_mpi, array)); 1757 1758 /* create scatter scat_sol */ 1759 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1760 /* iidx: index for scatter mumps solution to petsc X */ 1761 1762 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1763 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1764 for (i = 0; i < lsol_loc; i++) { 1765 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1766 1767 for (proc = 0; proc < mumps->petsc_size; proc++) { 1768 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1769 myrstart = rstart[proc]; 1770 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1771 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1772 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1773 break; 1774 } 1775 } 1776 1777 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1778 } 1779 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1780 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1781 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1782 PetscCall(ISDestroy(&is_from)); 1783 PetscCall(ISDestroy(&is_to)); 1784 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1785 PetscCall(MatDenseRestoreArray(X, &array)); 1786 1787 /* free spaces */ 1788 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1789 mumps->id.isol_loc = isol_loc_save; 1790 1791 PetscCall(PetscFree2(sol_loc, isol_loc)); 1792 PetscCall(PetscFree(idxx)); 1793 PetscCall(VecDestroy(&msol_loc)); 1794 PetscCall(VecDestroy(&v_mpi)); 1795 if (!denseB) { 1796 if (!mumps->myid) { 1797 b = (Mat_MPIAIJ *)Bt->data; 1798 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1799 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1800 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1801 } 1802 } else { 1803 if (mumps->ICNTL20 == 0) { 1804 PetscCall(VecDestroy(&b_seq)); 1805 PetscCall(VecScatterDestroy(&scat_rhs)); 1806 } 1807 } 1808 PetscCall(VecScatterDestroy(&scat_sol)); 1809 PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)))); 1810 PetscFunctionReturn(PETSC_SUCCESS); 1811 } 1812 1813 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1814 { 1815 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1816 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1817 1818 PetscFunctionBegin; 1819 mumps->id.ICNTL(9) = 0; 1820 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1821 mumps->id.ICNTL(9) = value; 1822 PetscFunctionReturn(PETSC_SUCCESS); 1823 } 1824 1825 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1826 { 1827 PetscBool flg; 1828 Mat B; 1829 1830 PetscFunctionBegin; 1831 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1832 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1833 1834 /* Create B=Bt^T that uses Bt's data structure */ 1835 PetscCall(MatCreateTranspose(Bt, &B)); 1836 1837 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1838 PetscCall(MatDestroy(&B)); 1839 PetscFunctionReturn(PETSC_SUCCESS); 1840 } 1841 1842 #if !defined(PETSC_USE_COMPLEX) 1843 /* 1844 input: 1845 F: numeric factor 1846 output: 1847 nneg: total number of negative pivots 1848 nzero: total number of zero pivots 1849 npos: (global dimension of F) - nneg - nzero 1850 */ 1851 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1852 { 1853 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1854 PetscMPIInt size; 1855 1856 PetscFunctionBegin; 1857 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1858 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1859 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1860 1861 if (nneg) *nneg = mumps->id.INFOG(12); 1862 if (nzero || npos) { 1863 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1864 if (nzero) *nzero = mumps->id.INFOG(28); 1865 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1866 } 1867 PetscFunctionReturn(PETSC_SUCCESS); 1868 } 1869 #endif 1870 1871 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1872 { 1873 PetscInt i, nreqs; 1874 PetscMUMPSInt *irn, *jcn; 1875 PetscMPIInt count; 1876 PetscInt64 totnnz, remain; 1877 const PetscInt osize = mumps->omp_comm_size; 1878 PetscScalar *val; 1879 1880 PetscFunctionBegin; 1881 if (osize > 1) { 1882 if (reuse == MAT_INITIAL_MATRIX) { 1883 /* master first gathers counts of nonzeros to receive */ 1884 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1885 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1886 1887 /* Then each computes number of send/recvs */ 1888 if (mumps->is_omp_master) { 1889 /* Start from 1 since self communication is not done in MPI */ 1890 nreqs = 0; 1891 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1892 } else { 1893 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1894 } 1895 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1896 1897 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1898 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1899 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1900 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1901 */ 1902 nreqs = 0; /* counter for actual send/recvs */ 1903 if (mumps->is_omp_master) { 1904 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1905 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1906 PetscCall(PetscMalloc1(totnnz, &val)); 1907 1908 /* Self communication */ 1909 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1910 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1911 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1912 1913 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1914 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1915 PetscCall(PetscFree(mumps->val_alloc)); 1916 mumps->nnz = totnnz; 1917 mumps->irn = irn; 1918 mumps->jcn = jcn; 1919 mumps->val = mumps->val_alloc = val; 1920 1921 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1922 jcn += mumps->recvcount[0]; 1923 val += mumps->recvcount[0]; 1924 1925 /* Remote communication */ 1926 for (i = 1; i < osize; i++) { 1927 count = PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 1928 remain = mumps->recvcount[i] - count; 1929 while (count > 0) { 1930 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1931 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1932 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1933 irn += count; 1934 jcn += count; 1935 val += count; 1936 count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1937 remain -= count; 1938 } 1939 } 1940 } else { 1941 irn = mumps->irn; 1942 jcn = mumps->jcn; 1943 val = mumps->val; 1944 count = PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 1945 remain = mumps->nnz - count; 1946 while (count > 0) { 1947 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1948 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1949 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1950 irn += count; 1951 jcn += count; 1952 val += count; 1953 count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1954 remain -= count; 1955 } 1956 } 1957 } else { 1958 nreqs = 0; 1959 if (mumps->is_omp_master) { 1960 val = mumps->val + mumps->recvcount[0]; 1961 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1962 count = PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 1963 remain = mumps->recvcount[i] - count; 1964 while (count > 0) { 1965 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1966 val += count; 1967 count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1968 remain -= count; 1969 } 1970 } 1971 } else { 1972 val = mumps->val; 1973 count = PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 1974 remain = mumps->nnz - count; 1975 while (count > 0) { 1976 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1977 val += count; 1978 count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1979 remain -= count; 1980 } 1981 } 1982 } 1983 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1984 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1985 } 1986 PetscFunctionReturn(PETSC_SUCCESS); 1987 } 1988 1989 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info) 1990 { 1991 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1992 PetscBool isMPIAIJ; 1993 1994 PetscFunctionBegin; 1995 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1996 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1997 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1998 PetscFunctionReturn(PETSC_SUCCESS); 1999 } 2000 2001 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 2002 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 2003 2004 /* numerical factorization phase */ 2005 mumps->id.job = JOB_FACTNUMERIC; 2006 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2007 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 2008 } else { 2009 mumps->id.a_loc = (MumpsScalar *)mumps->val; 2010 } 2011 PetscMUMPS_c(mumps); 2012 if (mumps->id.INFOG(1) < 0) { 2013 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2014 if (mumps->id.INFOG(1) == -10) { 2015 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2016 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2017 } else if (mumps->id.INFOG(1) == -13) { 2018 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2019 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2020 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2021 PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2022 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2023 } else { 2024 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2025 F->factorerrortype = MAT_FACTOR_OTHER; 2026 } 2027 } 2028 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2029 2030 F->assembled = PETSC_TRUE; 2031 2032 if (F->schur) { /* reset Schur status to unfactored */ 2033 #if defined(PETSC_HAVE_CUDA) 2034 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 2035 #endif 2036 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2037 mumps->id.ICNTL(19) = 2; 2038 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2039 } 2040 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2041 } 2042 2043 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2044 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2045 2046 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2047 if (mumps->petsc_size > 1) { 2048 PetscInt lsol_loc; 2049 PetscScalar *sol_loc; 2050 2051 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 2052 2053 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2054 if (mumps->x_seq) { 2055 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2056 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 2057 PetscCall(VecDestroy(&mumps->x_seq)); 2058 } 2059 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2060 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 2061 mumps->id.lsol_loc = lsol_loc; 2062 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 2063 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 2064 } 2065 PetscCall(PetscLogFlops((double)mumps->id.RINFO(2))); 2066 PetscFunctionReturn(PETSC_SUCCESS); 2067 } 2068 2069 /* Sets MUMPS options from the options database */ 2070 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2071 { 2072 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2073 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2074 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2075 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 2076 MumpsScalar *arr; 2077 2078 PetscFunctionBegin; 2079 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2080 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2081 PetscInt nthreads = 0; 2082 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2083 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2084 2085 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2086 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2087 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2088 2089 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2090 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2091 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2092 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2093 if (mumps->use_petsc_omp_support) { 2094 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2095 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2096 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2097 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2098 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2099 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2100 #endif 2101 } else { 2102 mumps->omp_comm = PETSC_COMM_SELF; 2103 mumps->mumps_comm = mumps->petsc_comm; 2104 mumps->is_omp_master = PETSC_TRUE; 2105 } 2106 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2107 mumps->reqs = NULL; 2108 mumps->tag = 0; 2109 2110 if (mumps->mumps_comm != MPI_COMM_NULL) { 2111 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2112 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2113 MPI_Comm comm; 2114 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2115 mumps->mumps_comm = comm; 2116 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2117 } 2118 2119 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2120 mumps->id.job = JOB_INIT; 2121 mumps->id.par = 1; /* host participates factorizaton and solve */ 2122 mumps->id.sym = mumps->sym; 2123 2124 size = mumps->id.size_schur; 2125 arr = mumps->id.schur; 2126 listvar_schur = mumps->id.listvar_schur; 2127 PetscMUMPS_c(mumps); 2128 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2129 /* restore cached ICNTL and CNTL values */ 2130 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2131 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2132 PetscCall(PetscFree(mumps->ICNTL_pre)); 2133 PetscCall(PetscFree(mumps->CNTL_pre)); 2134 2135 if (schur) { 2136 mumps->id.size_schur = size; 2137 mumps->id.schur_lld = size; 2138 mumps->id.schur = arr; 2139 mumps->id.listvar_schur = listvar_schur; 2140 if (mumps->petsc_size > 1) { 2141 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2142 2143 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2144 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2145 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2146 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2147 } else { 2148 if (F->factortype == MAT_FACTOR_LU) { 2149 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2150 } else { 2151 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2152 } 2153 } 2154 mumps->id.ICNTL(26) = -1; 2155 } 2156 2157 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2158 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2159 */ 2160 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2161 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2162 2163 mumps->scat_rhs = NULL; 2164 mumps->scat_sol = NULL; 2165 2166 /* set PETSc-MUMPS default options - override MUMPS default */ 2167 mumps->id.ICNTL(3) = 0; 2168 mumps->id.ICNTL(4) = 0; 2169 if (mumps->petsc_size == 1) { 2170 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2171 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2172 } else { 2173 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2174 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2175 } 2176 } 2177 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2178 if (flg) mumps->id.ICNTL(1) = icntl; 2179 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2180 if (flg) mumps->id.ICNTL(2) = icntl; 2181 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2182 if (flg) mumps->id.ICNTL(3) = icntl; 2183 2184 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2185 if (flg) mumps->id.ICNTL(4) = icntl; 2186 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2187 2188 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2189 if (flg) mumps->id.ICNTL(6) = icntl; 2190 2191 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2192 if (flg) { 2193 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2194 mumps->id.ICNTL(7) = icntl; 2195 } 2196 2197 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2198 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2199 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2200 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2201 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2202 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2203 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2204 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2205 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2206 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2207 if (flg) { 2208 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2209 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2210 } 2211 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2212 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2213 PetscCall(MatDestroy(&F->schur)); 2214 PetscCall(MatMumpsResetSchur_Private(mumps)); 2215 } 2216 2217 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2218 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2219 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2220 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2221 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2222 In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 2223 */ 2224 #if PETSC_PKG_MUMPS_VERSION_GE(5, 6, 2) && defined(PETSC_HAVE_MUMPS_AVOID_MPI_IN_PLACE) 2225 mumps->ICNTL20 = 10; 2226 #elif PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 2227 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2228 #else 2229 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2230 #endif 2231 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2232 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2233 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2234 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2235 #endif 2236 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2237 2238 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2239 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2240 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2241 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2242 2243 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2244 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2245 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2246 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2247 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2248 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2249 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2250 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2251 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2252 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2253 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2254 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2255 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2256 2257 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2258 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2259 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2260 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2261 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2262 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2263 2264 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2265 2266 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2267 if (ninfo) { 2268 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2269 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2270 mumps->ninfo = ninfo; 2271 for (i = 0; i < ninfo; i++) { 2272 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2273 mumps->info[i] = info[i]; 2274 } 2275 } 2276 PetscOptionsEnd(); 2277 PetscFunctionReturn(PETSC_SUCCESS); 2278 } 2279 2280 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps) 2281 { 2282 PetscFunctionBegin; 2283 if (mumps->id.INFOG(1) < 0) { 2284 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2285 if (mumps->id.INFOG(1) == -6) { 2286 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2287 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2288 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2289 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2290 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2291 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2292 PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n")); 2293 } else { 2294 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2295 F->factorerrortype = MAT_FACTOR_OTHER; 2296 } 2297 } 2298 PetscFunctionReturn(PETSC_SUCCESS); 2299 } 2300 2301 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2302 { 2303 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2304 Vec b; 2305 const PetscInt M = A->rmap->N; 2306 2307 PetscFunctionBegin; 2308 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2309 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2310 PetscFunctionReturn(PETSC_SUCCESS); 2311 } 2312 2313 /* Set MUMPS options from the options database */ 2314 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2315 2316 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2317 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2318 2319 /* analysis phase */ 2320 mumps->id.job = JOB_FACTSYMBOLIC; 2321 mumps->id.n = M; 2322 switch (mumps->id.ICNTL(18)) { 2323 case 0: /* centralized assembled matrix input */ 2324 if (!mumps->myid) { 2325 mumps->id.nnz = mumps->nnz; 2326 mumps->id.irn = mumps->irn; 2327 mumps->id.jcn = mumps->jcn; 2328 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2329 if (r) { 2330 mumps->id.ICNTL(7) = 1; 2331 if (!mumps->myid) { 2332 const PetscInt *idx; 2333 PetscInt i; 2334 2335 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2336 PetscCall(ISGetIndices(r, &idx)); 2337 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2338 PetscCall(ISRestoreIndices(r, &idx)); 2339 } 2340 } 2341 } 2342 break; 2343 case 3: /* distributed assembled matrix input (size>1) */ 2344 mumps->id.nnz_loc = mumps->nnz; 2345 mumps->id.irn_loc = mumps->irn; 2346 mumps->id.jcn_loc = mumps->jcn; 2347 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2348 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2349 PetscCall(MatCreateVecs(A, NULL, &b)); 2350 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2351 PetscCall(VecDestroy(&b)); 2352 } 2353 break; 2354 } 2355 PetscMUMPS_c(mumps); 2356 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2357 2358 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2359 F->ops->solve = MatSolve_MUMPS; 2360 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2361 F->ops->matsolve = MatMatSolve_MUMPS; 2362 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2363 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2364 2365 mumps->matstruc = SAME_NONZERO_PATTERN; 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } 2368 2369 /* Note the Petsc r and c permutations are ignored */ 2370 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2371 { 2372 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2373 Vec b; 2374 const PetscInt M = A->rmap->N; 2375 2376 PetscFunctionBegin; 2377 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2378 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2379 PetscFunctionReturn(PETSC_SUCCESS); 2380 } 2381 2382 /* Set MUMPS options from the options database */ 2383 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2384 2385 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2386 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2387 2388 /* analysis phase */ 2389 mumps->id.job = JOB_FACTSYMBOLIC; 2390 mumps->id.n = M; 2391 switch (mumps->id.ICNTL(18)) { 2392 case 0: /* centralized assembled matrix input */ 2393 if (!mumps->myid) { 2394 mumps->id.nnz = mumps->nnz; 2395 mumps->id.irn = mumps->irn; 2396 mumps->id.jcn = mumps->jcn; 2397 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2398 } 2399 break; 2400 case 3: /* distributed assembled matrix input (size>1) */ 2401 mumps->id.nnz_loc = mumps->nnz; 2402 mumps->id.irn_loc = mumps->irn; 2403 mumps->id.jcn_loc = mumps->jcn; 2404 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2405 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2406 PetscCall(MatCreateVecs(A, NULL, &b)); 2407 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2408 PetscCall(VecDestroy(&b)); 2409 } 2410 break; 2411 } 2412 PetscMUMPS_c(mumps); 2413 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2414 2415 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2416 F->ops->solve = MatSolve_MUMPS; 2417 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2418 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2419 2420 mumps->matstruc = SAME_NONZERO_PATTERN; 2421 PetscFunctionReturn(PETSC_SUCCESS); 2422 } 2423 2424 /* Note the Petsc r permutation and factor info are ignored */ 2425 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info) 2426 { 2427 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2428 Vec b; 2429 const PetscInt M = A->rmap->N; 2430 2431 PetscFunctionBegin; 2432 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2433 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2434 PetscFunctionReturn(PETSC_SUCCESS); 2435 } 2436 2437 /* Set MUMPS options from the options database */ 2438 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2439 2440 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2441 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2442 2443 /* analysis phase */ 2444 mumps->id.job = JOB_FACTSYMBOLIC; 2445 mumps->id.n = M; 2446 switch (mumps->id.ICNTL(18)) { 2447 case 0: /* centralized assembled matrix input */ 2448 if (!mumps->myid) { 2449 mumps->id.nnz = mumps->nnz; 2450 mumps->id.irn = mumps->irn; 2451 mumps->id.jcn = mumps->jcn; 2452 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2453 } 2454 break; 2455 case 3: /* distributed assembled matrix input (size>1) */ 2456 mumps->id.nnz_loc = mumps->nnz; 2457 mumps->id.irn_loc = mumps->irn; 2458 mumps->id.jcn_loc = mumps->jcn; 2459 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2460 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2461 PetscCall(MatCreateVecs(A, NULL, &b)); 2462 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2463 PetscCall(VecDestroy(&b)); 2464 } 2465 break; 2466 } 2467 PetscMUMPS_c(mumps); 2468 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2469 2470 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2471 F->ops->solve = MatSolve_MUMPS; 2472 F->ops->solvetranspose = MatSolve_MUMPS; 2473 F->ops->matsolve = MatMatSolve_MUMPS; 2474 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2475 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2476 #if defined(PETSC_USE_COMPLEX) 2477 F->ops->getinertia = NULL; 2478 #else 2479 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2480 #endif 2481 2482 mumps->matstruc = SAME_NONZERO_PATTERN; 2483 PetscFunctionReturn(PETSC_SUCCESS); 2484 } 2485 2486 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2487 { 2488 PetscBool iascii; 2489 PetscViewerFormat format; 2490 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2491 2492 PetscFunctionBegin; 2493 /* check if matrix is mumps type */ 2494 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2495 2496 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2497 if (iascii) { 2498 PetscCall(PetscViewerGetFormat(viewer, &format)); 2499 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2500 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2501 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2502 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2503 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2504 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2505 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2506 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2507 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2508 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2509 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2510 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2511 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2512 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2513 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2514 if (mumps->id.ICNTL(11) > 0) { 2515 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)mumps->id.RINFOG(4))); 2516 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)mumps->id.RINFOG(5))); 2517 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)mumps->id.RINFOG(6))); 2518 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)mumps->id.RINFOG(7), (double)mumps->id.RINFOG(8))); 2519 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)mumps->id.RINFOG(9))); 2520 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)mumps->id.RINFOG(10), (double)mumps->id.RINFOG(11))); 2521 } 2522 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2523 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2524 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2525 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2526 /* ICNTL(15-17) not used */ 2527 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2528 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2529 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2530 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2531 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2532 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2533 2534 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2535 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2536 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2537 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2538 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2539 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2540 2541 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2542 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2543 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2544 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2545 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2546 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2547 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2548 2549 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)mumps->id.CNTL(1))); 2550 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)mumps->id.CNTL(2))); 2551 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)mumps->id.CNTL(3))); 2552 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)mumps->id.CNTL(4))); 2553 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)mumps->id.CNTL(5))); 2554 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)mumps->id.CNTL(7))); 2555 2556 /* information local to each processor */ 2557 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2558 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2559 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(1))); 2560 PetscCall(PetscViewerFlush(viewer)); 2561 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2562 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(2))); 2563 PetscCall(PetscViewerFlush(viewer)); 2564 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2565 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(3))); 2566 PetscCall(PetscViewerFlush(viewer)); 2567 2568 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2569 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2570 PetscCall(PetscViewerFlush(viewer)); 2571 2572 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2573 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2574 PetscCall(PetscViewerFlush(viewer)); 2575 2576 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2577 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2578 PetscCall(PetscViewerFlush(viewer)); 2579 2580 if (mumps->ninfo && mumps->ninfo <= 80) { 2581 PetscInt i; 2582 for (i = 0; i < mumps->ninfo; i++) { 2583 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2584 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2585 PetscCall(PetscViewerFlush(viewer)); 2586 } 2587 } 2588 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2589 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2590 2591 if (mumps->myid == 0) { /* information from the host */ 2592 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)mumps->id.RINFOG(1))); 2593 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)mumps->id.RINFOG(2))); 2594 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)mumps->id.RINFOG(3))); 2595 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)mumps->id.RINFOG(12), (double)mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2596 2597 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2598 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2599 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2600 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2601 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2602 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2603 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2604 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2605 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2606 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2607 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2608 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2609 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2610 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2611 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2612 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2613 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2614 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2615 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2616 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2617 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2618 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2619 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2620 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2621 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2622 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2623 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2624 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2625 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2626 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2627 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2628 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2629 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2630 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2631 } 2632 } 2633 } 2634 PetscFunctionReturn(PETSC_SUCCESS); 2635 } 2636 2637 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info) 2638 { 2639 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2640 2641 PetscFunctionBegin; 2642 info->block_size = 1.0; 2643 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2644 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2645 info->nz_unneeded = 0.0; 2646 info->assemblies = 0.0; 2647 info->mallocs = 0.0; 2648 info->memory = 0.0; 2649 info->fill_ratio_given = 0; 2650 info->fill_ratio_needed = 0; 2651 info->factor_mallocs = 0; 2652 PetscFunctionReturn(PETSC_SUCCESS); 2653 } 2654 2655 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2656 { 2657 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2658 const PetscScalar *arr; 2659 const PetscInt *idxs; 2660 PetscInt size, i; 2661 2662 PetscFunctionBegin; 2663 PetscCall(ISGetLocalSize(is, &size)); 2664 /* Schur complement matrix */ 2665 PetscCall(MatDestroy(&F->schur)); 2666 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2667 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2668 mumps->id.schur = (MumpsScalar *)arr; 2669 mumps->id.size_schur = size; 2670 mumps->id.schur_lld = size; 2671 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2672 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2673 2674 /* MUMPS expects Fortran style indices */ 2675 PetscCall(PetscFree(mumps->id.listvar_schur)); 2676 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2677 PetscCall(ISGetIndices(is, &idxs)); 2678 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2679 PetscCall(ISRestoreIndices(is, &idxs)); 2680 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2681 mumps->id.ICNTL(26) = -1; 2682 PetscFunctionReturn(PETSC_SUCCESS); 2683 } 2684 2685 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2686 { 2687 Mat St; 2688 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2689 PetscScalar *array; 2690 2691 PetscFunctionBegin; 2692 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 2693 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2694 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2695 PetscCall(MatSetType(St, MATDENSE)); 2696 PetscCall(MatSetUp(St)); 2697 PetscCall(MatDenseGetArray(St, &array)); 2698 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2699 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2700 PetscInt i, j, N = mumps->id.size_schur; 2701 for (i = 0; i < N; i++) { 2702 for (j = 0; j < N; j++) { 2703 #if !defined(PETSC_USE_COMPLEX) 2704 PetscScalar val = mumps->id.schur[i * N + j]; 2705 #else 2706 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2707 #endif 2708 array[j * N + i] = val; 2709 } 2710 } 2711 } else { /* stored by columns */ 2712 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2713 } 2714 } else { /* either full or lower-triangular (not packed) */ 2715 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2716 PetscInt i, j, N = mumps->id.size_schur; 2717 for (i = 0; i < N; i++) { 2718 for (j = i; j < N; j++) { 2719 #if !defined(PETSC_USE_COMPLEX) 2720 PetscScalar val = mumps->id.schur[i * N + j]; 2721 #else 2722 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2723 #endif 2724 array[i * N + j] = array[j * N + i] = val; 2725 } 2726 } 2727 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2728 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2729 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2730 PetscInt i, j, N = mumps->id.size_schur; 2731 for (i = 0; i < N; i++) { 2732 for (j = 0; j < i + 1; j++) { 2733 #if !defined(PETSC_USE_COMPLEX) 2734 PetscScalar val = mumps->id.schur[i * N + j]; 2735 #else 2736 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2737 #endif 2738 array[i * N + j] = array[j * N + i] = val; 2739 } 2740 } 2741 } 2742 } 2743 PetscCall(MatDenseRestoreArray(St, &array)); 2744 *S = St; 2745 PetscFunctionReturn(PETSC_SUCCESS); 2746 } 2747 2748 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2749 { 2750 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2751 2752 PetscFunctionBegin; 2753 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2754 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2755 for (i = 0; i < nICNTL_pre; ++i) 2756 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2757 if (i == nICNTL_pre) { /* not already cached */ 2758 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2759 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2760 mumps->ICNTL_pre[0]++; 2761 } 2762 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2763 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2764 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2765 PetscFunctionReturn(PETSC_SUCCESS); 2766 } 2767 2768 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2769 { 2770 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2771 2772 PetscFunctionBegin; 2773 if (mumps->id.job == JOB_NULL) { 2774 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2775 *ival = 0; 2776 for (i = 0; i < nICNTL_pre; ++i) { 2777 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2778 } 2779 } else *ival = mumps->id.ICNTL(icntl); 2780 PetscFunctionReturn(PETSC_SUCCESS); 2781 } 2782 2783 /*@ 2784 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 2785 2786 Logically Collective 2787 2788 Input Parameters: 2789 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2790 . icntl - index of MUMPS parameter array ICNTL() 2791 - ival - value of MUMPS ICNTL(icntl) 2792 2793 Options Database Key: 2794 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2795 2796 Level: beginner 2797 2798 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2799 @*/ 2800 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2801 { 2802 PetscFunctionBegin; 2803 PetscValidType(F, 1); 2804 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2805 PetscValidLogicalCollectiveInt(F, icntl, 2); 2806 PetscValidLogicalCollectiveInt(F, ival, 3); 2807 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2808 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2809 PetscFunctionReturn(PETSC_SUCCESS); 2810 } 2811 2812 /*@ 2813 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 2814 2815 Logically Collective 2816 2817 Input Parameters: 2818 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2819 - icntl - index of MUMPS parameter array ICNTL() 2820 2821 Output Parameter: 2822 . ival - value of MUMPS ICNTL(icntl) 2823 2824 Level: beginner 2825 2826 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2827 @*/ 2828 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2829 { 2830 PetscFunctionBegin; 2831 PetscValidType(F, 1); 2832 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2833 PetscValidLogicalCollectiveInt(F, icntl, 2); 2834 PetscAssertPointer(ival, 3); 2835 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2836 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2837 PetscFunctionReturn(PETSC_SUCCESS); 2838 } 2839 2840 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2841 { 2842 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2843 2844 PetscFunctionBegin; 2845 if (mumps->id.job == JOB_NULL) { 2846 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2847 for (i = 0; i < nCNTL_pre; ++i) 2848 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2849 if (i == nCNTL_pre) { 2850 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2851 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2852 mumps->CNTL_pre[0]++; 2853 } 2854 mumps->CNTL_pre[1 + 2 * i] = icntl; 2855 mumps->CNTL_pre[2 + 2 * i] = val; 2856 } else mumps->id.CNTL(icntl) = val; 2857 PetscFunctionReturn(PETSC_SUCCESS); 2858 } 2859 2860 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2861 { 2862 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2863 2864 PetscFunctionBegin; 2865 if (mumps->id.job == JOB_NULL) { 2866 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2867 *val = 0.0; 2868 for (i = 0; i < nCNTL_pre; ++i) { 2869 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2870 } 2871 } else *val = mumps->id.CNTL(icntl); 2872 PetscFunctionReturn(PETSC_SUCCESS); 2873 } 2874 2875 /*@ 2876 MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 2877 2878 Logically Collective 2879 2880 Input Parameters: 2881 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2882 . icntl - index of MUMPS parameter array CNTL() 2883 - val - value of MUMPS CNTL(icntl) 2884 2885 Options Database Key: 2886 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2887 2888 Level: beginner 2889 2890 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2891 @*/ 2892 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2893 { 2894 PetscFunctionBegin; 2895 PetscValidType(F, 1); 2896 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2897 PetscValidLogicalCollectiveInt(F, icntl, 2); 2898 PetscValidLogicalCollectiveReal(F, val, 3); 2899 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2900 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2901 PetscFunctionReturn(PETSC_SUCCESS); 2902 } 2903 2904 /*@ 2905 MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 2906 2907 Logically Collective 2908 2909 Input Parameters: 2910 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2911 - icntl - index of MUMPS parameter array CNTL() 2912 2913 Output Parameter: 2914 . val - value of MUMPS CNTL(icntl) 2915 2916 Level: beginner 2917 2918 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2919 @*/ 2920 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2921 { 2922 PetscFunctionBegin; 2923 PetscValidType(F, 1); 2924 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2925 PetscValidLogicalCollectiveInt(F, icntl, 2); 2926 PetscAssertPointer(val, 3); 2927 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2928 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2929 PetscFunctionReturn(PETSC_SUCCESS); 2930 } 2931 2932 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2933 { 2934 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2935 2936 PetscFunctionBegin; 2937 *info = mumps->id.INFO(icntl); 2938 PetscFunctionReturn(PETSC_SUCCESS); 2939 } 2940 2941 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2942 { 2943 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2944 2945 PetscFunctionBegin; 2946 *infog = mumps->id.INFOG(icntl); 2947 PetscFunctionReturn(PETSC_SUCCESS); 2948 } 2949 2950 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2951 { 2952 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2953 2954 PetscFunctionBegin; 2955 *rinfo = mumps->id.RINFO(icntl); 2956 PetscFunctionReturn(PETSC_SUCCESS); 2957 } 2958 2959 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2960 { 2961 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2962 2963 PetscFunctionBegin; 2964 *rinfog = mumps->id.RINFOG(icntl); 2965 PetscFunctionReturn(PETSC_SUCCESS); 2966 } 2967 2968 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2969 { 2970 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2971 2972 PetscFunctionBegin; 2973 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2974 *size = 0; 2975 *array = NULL; 2976 if (!mumps->myid) { 2977 *size = mumps->id.INFOG(28); 2978 PetscCall(PetscMalloc1(*size, array)); 2979 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2980 } 2981 PetscFunctionReturn(PETSC_SUCCESS); 2982 } 2983 2984 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2985 { 2986 Mat Bt = NULL, Btseq = NULL; 2987 PetscBool flg; 2988 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2989 PetscScalar *aa; 2990 PetscInt spnr, *ia, *ja, M, nrhs; 2991 2992 PetscFunctionBegin; 2993 PetscAssertPointer(spRHS, 2); 2994 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2995 if (flg) { 2996 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2997 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2998 2999 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3000 3001 if (mumps->petsc_size > 1) { 3002 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 3003 Btseq = b->A; 3004 } else { 3005 Btseq = Bt; 3006 } 3007 3008 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 3009 mumps->id.nrhs = nrhs; 3010 mumps->id.lrhs = M; 3011 mumps->id.rhs = NULL; 3012 3013 if (!mumps->myid) { 3014 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 3015 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3016 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3017 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3018 mumps->id.rhs_sparse = (MumpsScalar *)aa; 3019 } else { 3020 mumps->id.irhs_ptr = NULL; 3021 mumps->id.irhs_sparse = NULL; 3022 mumps->id.nz_rhs = 0; 3023 mumps->id.rhs_sparse = NULL; 3024 } 3025 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3026 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3027 3028 /* solve phase */ 3029 mumps->id.job = JOB_SOLVE; 3030 PetscMUMPS_c(mumps); 3031 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 3032 3033 if (!mumps->myid) { 3034 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3035 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3036 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3037 } 3038 PetscFunctionReturn(PETSC_SUCCESS); 3039 } 3040 3041 /*@ 3042 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc> 3043 3044 Logically Collective 3045 3046 Input Parameter: 3047 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3048 3049 Output Parameter: 3050 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3051 3052 Level: beginner 3053 3054 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3055 @*/ 3056 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3057 { 3058 PetscFunctionBegin; 3059 PetscValidType(F, 1); 3060 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3061 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3062 PetscFunctionReturn(PETSC_SUCCESS); 3063 } 3064 3065 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3066 { 3067 Mat spRHS; 3068 3069 PetscFunctionBegin; 3070 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3071 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3072 PetscCall(MatDestroy(&spRHS)); 3073 PetscFunctionReturn(PETSC_SUCCESS); 3074 } 3075 3076 /*@ 3077 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc> 3078 3079 Logically Collective 3080 3081 Input Parameter: 3082 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3083 3084 Output Parameter: 3085 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3086 3087 Level: beginner 3088 3089 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3090 @*/ 3091 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3092 { 3093 PetscBool flg; 3094 3095 PetscFunctionBegin; 3096 PetscValidType(F, 1); 3097 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3098 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3099 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3100 3101 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3102 PetscFunctionReturn(PETSC_SUCCESS); 3103 } 3104 3105 /*@ 3106 MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc> 3107 3108 Logically Collective 3109 3110 Input Parameters: 3111 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3112 - icntl - index of MUMPS parameter array INFO() 3113 3114 Output Parameter: 3115 . ival - value of MUMPS INFO(icntl) 3116 3117 Level: beginner 3118 3119 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3120 @*/ 3121 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3122 { 3123 PetscFunctionBegin; 3124 PetscValidType(F, 1); 3125 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3126 PetscAssertPointer(ival, 3); 3127 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3128 PetscFunctionReturn(PETSC_SUCCESS); 3129 } 3130 3131 /*@ 3132 MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc> 3133 3134 Logically Collective 3135 3136 Input Parameters: 3137 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3138 - icntl - index of MUMPS parameter array INFOG() 3139 3140 Output Parameter: 3141 . ival - value of MUMPS INFOG(icntl) 3142 3143 Level: beginner 3144 3145 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3146 @*/ 3147 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3148 { 3149 PetscFunctionBegin; 3150 PetscValidType(F, 1); 3151 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3152 PetscAssertPointer(ival, 3); 3153 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3154 PetscFunctionReturn(PETSC_SUCCESS); 3155 } 3156 3157 /*@ 3158 MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc> 3159 3160 Logically Collective 3161 3162 Input Parameters: 3163 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3164 - icntl - index of MUMPS parameter array RINFO() 3165 3166 Output Parameter: 3167 . val - value of MUMPS RINFO(icntl) 3168 3169 Level: beginner 3170 3171 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3172 @*/ 3173 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3174 { 3175 PetscFunctionBegin; 3176 PetscValidType(F, 1); 3177 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3178 PetscAssertPointer(val, 3); 3179 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3180 PetscFunctionReturn(PETSC_SUCCESS); 3181 } 3182 3183 /*@ 3184 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc> 3185 3186 Logically Collective 3187 3188 Input Parameters: 3189 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3190 - icntl - index of MUMPS parameter array RINFOG() 3191 3192 Output Parameter: 3193 . val - value of MUMPS RINFOG(icntl) 3194 3195 Level: beginner 3196 3197 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3198 @*/ 3199 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3200 { 3201 PetscFunctionBegin; 3202 PetscValidType(F, 1); 3203 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3204 PetscAssertPointer(val, 3); 3205 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3206 PetscFunctionReturn(PETSC_SUCCESS); 3207 } 3208 3209 /*@ 3210 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc> 3211 3212 Logically Collective 3213 3214 Input Parameter: 3215 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3216 3217 Output Parameters: 3218 + size - local size of the array. The size of the array is non-zero only on the host. 3219 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3220 for freeing this array. 3221 3222 Level: beginner 3223 3224 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3225 @*/ 3226 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3227 { 3228 PetscFunctionBegin; 3229 PetscValidType(F, 1); 3230 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3231 PetscAssertPointer(size, 2); 3232 PetscAssertPointer(array, 3); 3233 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3234 PetscFunctionReturn(PETSC_SUCCESS); 3235 } 3236 3237 /*MC 3238 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3239 distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc> 3240 3241 Works with `MATAIJ` and `MATSBAIJ` matrices 3242 3243 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3244 3245 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3246 See details below. 3247 3248 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3249 3250 Options Database Keys: 3251 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3252 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3253 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3254 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3255 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3256 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3257 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3258 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3259 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3260 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3261 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3262 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3263 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3264 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3265 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3266 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3267 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3268 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3269 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3270 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3271 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3272 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 3273 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3274 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3275 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3276 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3277 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3278 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3279 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3280 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3281 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3282 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3283 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3284 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3285 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3286 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3287 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3288 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3289 3290 Level: beginner 3291 3292 Notes: 3293 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will 3294 error if the matrix is Hermitian. 3295 3296 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3297 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3298 3299 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3300 the failure with 3301 .vb 3302 KSPGetPC(ksp,&pc); 3303 PCFactorGetMatrix(pc,&mat); 3304 MatMumpsGetInfo(mat,....); 3305 MatMumpsGetInfog(mat,....); etc. 3306 .ve 3307 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3308 3309 MUMPS provides 64-bit integer support in two build modes: 3310 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3311 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3312 3313 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3314 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3315 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3316 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3317 3318 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3319 3320 Two modes to run MUMPS/PETSc with OpenMP 3321 .vb 3322 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3323 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3324 .ve 3325 3326 .vb 3327 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3328 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3329 .ve 3330 3331 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3332 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3333 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3334 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3335 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3336 3337 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3338 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3339 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3340 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3341 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3342 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3343 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3344 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3345 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3346 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3347 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3348 examine the mapping result. 3349 3350 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3351 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3352 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3353 3354 See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating` 3355 3356 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3357 M*/ 3358 3359 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type) 3360 { 3361 PetscFunctionBegin; 3362 *type = MATSOLVERMUMPS; 3363 PetscFunctionReturn(PETSC_SUCCESS); 3364 } 3365 3366 /* MatGetFactor for Seq and MPI AIJ matrices */ 3367 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3368 { 3369 Mat B; 3370 Mat_MUMPS *mumps; 3371 PetscBool isSeqAIJ, isDiag, isDense; 3372 PetscMPIInt size; 3373 3374 PetscFunctionBegin; 3375 #if defined(PETSC_USE_COMPLEX) 3376 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3377 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3378 *F = NULL; 3379 PetscFunctionReturn(PETSC_SUCCESS); 3380 } 3381 #endif 3382 /* Create the factorization matrix */ 3383 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3384 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 3385 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3386 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3387 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3388 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3389 PetscCall(MatSetUp(B)); 3390 3391 PetscCall(PetscNew(&mumps)); 3392 3393 B->ops->view = MatView_MUMPS; 3394 B->ops->getinfo = MatGetInfo_MUMPS; 3395 3396 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3397 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3398 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3399 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3400 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3401 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3402 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3403 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3404 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3405 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3406 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3407 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3408 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3409 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3410 3411 if (ftype == MAT_FACTOR_LU) { 3412 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3413 B->factortype = MAT_FACTOR_LU; 3414 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3415 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3416 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3417 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3418 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3419 mumps->sym = 0; 3420 } else { 3421 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3422 B->factortype = MAT_FACTOR_CHOLESKY; 3423 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3424 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3425 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3426 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3427 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3428 #if defined(PETSC_USE_COMPLEX) 3429 mumps->sym = 2; 3430 #else 3431 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3432 else mumps->sym = 2; 3433 #endif 3434 } 3435 3436 /* set solvertype */ 3437 PetscCall(PetscFree(B->solvertype)); 3438 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3439 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3440 if (size == 1) { 3441 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3442 B->canuseordering = PETSC_TRUE; 3443 } 3444 B->ops->destroy = MatDestroy_MUMPS; 3445 B->data = (void *)mumps; 3446 3447 *F = B; 3448 mumps->id.job = JOB_NULL; 3449 mumps->ICNTL_pre = NULL; 3450 mumps->CNTL_pre = NULL; 3451 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3452 PetscFunctionReturn(PETSC_SUCCESS); 3453 } 3454 3455 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3456 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F) 3457 { 3458 Mat B; 3459 Mat_MUMPS *mumps; 3460 PetscBool isSeqSBAIJ; 3461 PetscMPIInt size; 3462 3463 PetscFunctionBegin; 3464 #if defined(PETSC_USE_COMPLEX) 3465 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3466 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3467 *F = NULL; 3468 PetscFunctionReturn(PETSC_SUCCESS); 3469 } 3470 #endif 3471 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3472 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3473 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3474 PetscCall(MatSetUp(B)); 3475 3476 PetscCall(PetscNew(&mumps)); 3477 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3478 if (isSeqSBAIJ) { 3479 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3480 } else { 3481 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3482 } 3483 3484 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3485 B->ops->view = MatView_MUMPS; 3486 B->ops->getinfo = MatGetInfo_MUMPS; 3487 3488 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3489 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3490 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3491 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3492 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3493 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3494 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3495 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3496 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3497 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3498 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3499 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3500 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3501 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3502 3503 B->factortype = MAT_FACTOR_CHOLESKY; 3504 #if defined(PETSC_USE_COMPLEX) 3505 mumps->sym = 2; 3506 #else 3507 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3508 else mumps->sym = 2; 3509 #endif 3510 3511 /* set solvertype */ 3512 PetscCall(PetscFree(B->solvertype)); 3513 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3514 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3515 if (size == 1) { 3516 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3517 B->canuseordering = PETSC_TRUE; 3518 } 3519 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3520 B->ops->destroy = MatDestroy_MUMPS; 3521 B->data = (void *)mumps; 3522 3523 *F = B; 3524 mumps->id.job = JOB_NULL; 3525 mumps->ICNTL_pre = NULL; 3526 mumps->CNTL_pre = NULL; 3527 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3528 PetscFunctionReturn(PETSC_SUCCESS); 3529 } 3530 3531 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3532 { 3533 Mat B; 3534 Mat_MUMPS *mumps; 3535 PetscBool isSeqBAIJ; 3536 PetscMPIInt size; 3537 3538 PetscFunctionBegin; 3539 /* Create the factorization matrix */ 3540 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3541 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3542 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3543 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3544 PetscCall(MatSetUp(B)); 3545 3546 PetscCall(PetscNew(&mumps)); 3547 if (ftype == MAT_FACTOR_LU) { 3548 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3549 B->factortype = MAT_FACTOR_LU; 3550 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3551 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3552 mumps->sym = 0; 3553 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3554 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3555 3556 B->ops->view = MatView_MUMPS; 3557 B->ops->getinfo = MatGetInfo_MUMPS; 3558 3559 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3560 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3561 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3562 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3563 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3564 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3565 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3566 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3567 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3568 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3569 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3570 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3571 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3572 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3573 3574 /* set solvertype */ 3575 PetscCall(PetscFree(B->solvertype)); 3576 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3577 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3578 if (size == 1) { 3579 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3580 B->canuseordering = PETSC_TRUE; 3581 } 3582 B->ops->destroy = MatDestroy_MUMPS; 3583 B->data = (void *)mumps; 3584 3585 *F = B; 3586 mumps->id.job = JOB_NULL; 3587 mumps->ICNTL_pre = NULL; 3588 mumps->CNTL_pre = NULL; 3589 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3590 PetscFunctionReturn(PETSC_SUCCESS); 3591 } 3592 3593 /* MatGetFactor for Seq and MPI SELL matrices */ 3594 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3595 { 3596 Mat B; 3597 Mat_MUMPS *mumps; 3598 PetscBool isSeqSELL; 3599 PetscMPIInt size; 3600 3601 PetscFunctionBegin; 3602 /* Create the factorization matrix */ 3603 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3604 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3605 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3606 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3607 PetscCall(MatSetUp(B)); 3608 3609 PetscCall(PetscNew(&mumps)); 3610 3611 B->ops->view = MatView_MUMPS; 3612 B->ops->getinfo = MatGetInfo_MUMPS; 3613 3614 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3615 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3616 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3617 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3618 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3619 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3620 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3621 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3622 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3623 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3624 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3625 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3626 3627 if (ftype == MAT_FACTOR_LU) { 3628 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3629 B->factortype = MAT_FACTOR_LU; 3630 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3631 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3632 mumps->sym = 0; 3633 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3634 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3635 3636 /* set solvertype */ 3637 PetscCall(PetscFree(B->solvertype)); 3638 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3639 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3640 if (size == 1) { 3641 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3642 B->canuseordering = PETSC_TRUE; 3643 } 3644 B->ops->destroy = MatDestroy_MUMPS; 3645 B->data = (void *)mumps; 3646 3647 *F = B; 3648 mumps->id.job = JOB_NULL; 3649 mumps->ICNTL_pre = NULL; 3650 mumps->CNTL_pre = NULL; 3651 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3652 PetscFunctionReturn(PETSC_SUCCESS); 3653 } 3654 3655 /* MatGetFactor for MATNEST matrices */ 3656 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3657 { 3658 Mat B, **mats; 3659 Mat_MUMPS *mumps; 3660 PetscInt nr, nc; 3661 PetscMPIInt size; 3662 PetscBool flg = PETSC_TRUE; 3663 3664 PetscFunctionBegin; 3665 #if defined(PETSC_USE_COMPLEX) 3666 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3667 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3668 *F = NULL; 3669 PetscFunctionReturn(PETSC_SUCCESS); 3670 } 3671 #endif 3672 3673 /* Return if some condition is not satisfied */ 3674 *F = NULL; 3675 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3676 if (ftype == MAT_FACTOR_CHOLESKY) { 3677 IS *rows, *cols; 3678 PetscInt *m, *M; 3679 3680 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3681 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3682 PetscCall(MatNestGetISs(A, rows, cols)); 3683 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3684 if (!flg) { 3685 PetscCall(PetscFree2(rows, cols)); 3686 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3687 PetscFunctionReturn(PETSC_SUCCESS); 3688 } 3689 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3690 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3691 for (PetscInt r = 0; flg && r < nr; r++) 3692 for (PetscInt k = r + 1; flg && k < nr; k++) 3693 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3694 PetscCall(PetscFree2(m, M)); 3695 PetscCall(PetscFree2(rows, cols)); 3696 if (!flg) { 3697 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3698 PetscFunctionReturn(PETSC_SUCCESS); 3699 } 3700 } 3701 3702 for (PetscInt r = 0; r < nr; r++) { 3703 for (PetscInt c = 0; c < nc; c++) { 3704 Mat sub = mats[r][c]; 3705 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isDiag, isDense; 3706 3707 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3708 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3709 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3710 else { 3711 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3712 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3713 } 3714 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3715 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3716 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3717 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3718 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3719 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3720 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 3721 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3722 if (ftype == MAT_FACTOR_CHOLESKY) { 3723 if (r == c) { 3724 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 3725 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3726 flg = PETSC_FALSE; 3727 } 3728 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3729 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3730 flg = PETSC_FALSE; 3731 } 3732 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3733 PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3734 flg = PETSC_FALSE; 3735 } 3736 } 3737 } 3738 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3739 3740 /* Create the factorization matrix */ 3741 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3742 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3743 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3744 PetscCall(MatSetUp(B)); 3745 3746 PetscCall(PetscNew(&mumps)); 3747 3748 B->ops->view = MatView_MUMPS; 3749 B->ops->getinfo = MatGetInfo_MUMPS; 3750 3751 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3752 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3753 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3754 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3755 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3756 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3757 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3758 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3759 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3760 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3761 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3762 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3763 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3764 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3765 3766 if (ftype == MAT_FACTOR_LU) { 3767 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3768 B->factortype = MAT_FACTOR_LU; 3769 mumps->sym = 0; 3770 } else { 3771 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3772 B->factortype = MAT_FACTOR_CHOLESKY; 3773 #if defined(PETSC_USE_COMPLEX) 3774 mumps->sym = 2; 3775 #else 3776 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3777 else mumps->sym = 2; 3778 #endif 3779 } 3780 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3781 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3782 3783 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3784 if (size == 1) { 3785 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3786 B->canuseordering = PETSC_TRUE; 3787 } 3788 3789 /* set solvertype */ 3790 PetscCall(PetscFree(B->solvertype)); 3791 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3792 B->ops->destroy = MatDestroy_MUMPS; 3793 B->data = (void *)mumps; 3794 3795 *F = B; 3796 mumps->id.job = JOB_NULL; 3797 mumps->ICNTL_pre = NULL; 3798 mumps->CNTL_pre = NULL; 3799 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3800 PetscFunctionReturn(PETSC_SUCCESS); 3801 } 3802 3803 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3804 { 3805 PetscFunctionBegin; 3806 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3807 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3808 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3809 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3810 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3811 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3812 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3813 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3814 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3815 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3816 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3817 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3818 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3819 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3820 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3821 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3822 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3823 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3824 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3825 PetscFunctionReturn(PETSC_SUCCESS); 3826 } 3827