1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #if defined(PETSC_USE_REAL_SINGLE) 15 #include <cmumps_c.h> 16 #else 17 #include <zmumps_c.h> 18 #endif 19 #else 20 #if defined(PETSC_USE_REAL_SINGLE) 21 #include <smumps_c.h> 22 #else 23 #include <dmumps_c.h> 24 #endif 25 #endif 26 EXTERN_C_END 27 #define JOB_INIT -1 28 #define JOB_NULL 0 29 #define JOB_FACTSYMBOLIC 1 30 #define JOB_FACTNUMERIC 2 31 #define JOB_SOLVE 3 32 #define JOB_END -2 33 34 /* calls to MUMPS */ 35 #if defined(PETSC_USE_COMPLEX) 36 #if defined(PETSC_USE_REAL_SINGLE) 37 #define MUMPS_c cmumps_c 38 #else 39 #define MUMPS_c zmumps_c 40 #endif 41 #else 42 #if defined(PETSC_USE_REAL_SINGLE) 43 #define MUMPS_c smumps_c 44 #else 45 #define MUMPS_c dmumps_c 46 #endif 47 #endif 48 49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 50 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 51 naming convention in PetscMPIInt, PetscBLASInt etc. 52 */ 53 typedef MUMPS_INT PetscMUMPSInt; 54 55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 56 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 57 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 58 #endif 59 #else 60 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 61 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 62 #endif 63 #endif 64 65 #define MPIU_MUMPSINT MPI_INT 66 #define PETSC_MUMPS_INT_MAX 2147483647 67 #define PETSC_MUMPS_INT_MIN -2147483648 68 69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 70 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 71 { 72 PetscFunctionBegin; 73 #if PetscDefined(USE_64BIT_INDICES) 74 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 75 #endif 76 *b = (PetscMUMPSInt)(a); 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 /* Put these utility routines here since they are only used in this file */ 81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 82 { 83 PetscInt myval; 84 PetscBool myset; 85 PetscFunctionBegin; 86 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 87 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 88 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 89 if (set) *set = myset; 90 PetscFunctionReturn(PETSC_SUCCESS); 91 } 92 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 93 94 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 95 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 96 #define PetscMUMPS_c(mumps) \ 97 do { \ 98 if (mumps->use_petsc_omp_support) { \ 99 if (mumps->is_omp_master) { \ 100 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 101 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 102 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 103 PetscCall(PetscFPTrapPop()); \ 104 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 105 } \ 106 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 107 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 108 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 109 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 110 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 111 */ \ 112 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 113 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 115 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 116 } else { \ 117 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 118 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 119 PetscCall(PetscFPTrapPop()); \ 120 } \ 121 } while (0) 122 #else 123 #define PetscMUMPS_c(mumps) \ 124 do { \ 125 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 126 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 127 PetscCall(PetscFPTrapPop()); \ 128 } while (0) 129 #endif 130 131 /* declare MumpsScalar */ 132 #if defined(PETSC_USE_COMPLEX) 133 #if defined(PETSC_USE_REAL_SINGLE) 134 #define MumpsScalar mumps_complex 135 #else 136 #define MumpsScalar mumps_double_complex 137 #endif 138 #else 139 #define MumpsScalar PetscScalar 140 #endif 141 142 /* macros s.t. indices match MUMPS documentation */ 143 #define ICNTL(I) icntl[(I)-1] 144 #define CNTL(I) cntl[(I)-1] 145 #define INFOG(I) infog[(I)-1] 146 #define INFO(I) info[(I)-1] 147 #define RINFOG(I) rinfog[(I)-1] 148 #define RINFO(I) rinfo[(I)-1] 149 150 typedef struct Mat_MUMPS Mat_MUMPS; 151 struct Mat_MUMPS { 152 #if defined(PETSC_USE_COMPLEX) 153 #if defined(PETSC_USE_REAL_SINGLE) 154 CMUMPS_STRUC_C id; 155 #else 156 ZMUMPS_STRUC_C id; 157 #endif 158 #else 159 #if defined(PETSC_USE_REAL_SINGLE) 160 SMUMPS_STRUC_C id; 161 #else 162 DMUMPS_STRUC_C id; 163 #endif 164 #endif 165 166 MatStructure matstruc; 167 PetscMPIInt myid, petsc_size; 168 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 169 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 170 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 171 PetscMUMPSInt sym; 172 MPI_Comm mumps_comm; 173 PetscMUMPSInt *ICNTL_pre; 174 PetscReal *CNTL_pre; 175 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 176 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 177 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 178 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 179 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 180 PetscInt *rhs_nrow, max_nrhs; 181 PetscMPIInt *rhs_recvcounts, *rhs_disps; 182 PetscScalar *rhs_loc, *rhs_recvbuf; 183 #endif 184 Vec b_seq, x_seq; 185 PetscInt ninfo, *info; /* which INFO to display */ 186 PetscInt sizeredrhs; 187 PetscScalar *schur_sol; 188 PetscInt schur_sizesol; 189 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 190 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 191 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 192 193 /* Support for MATNEST */ 194 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 195 PetscInt64 *nest_vals_start; 196 PetscScalar *nest_vals; 197 198 /* stuff used by petsc/mumps OpenMP support*/ 199 PetscBool use_petsc_omp_support; 200 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 201 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 202 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 203 PetscMPIInt tag, omp_comm_size; 204 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 205 MPI_Request *reqs; 206 }; 207 208 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 209 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 210 */ 211 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 212 { 213 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 214 215 PetscFunctionBegin; 216 #if defined(PETSC_USE_64BIT_INDICES) 217 { 218 PetscInt i; 219 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 220 PetscCall(PetscFree(mumps->ia_alloc)); 221 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 222 mumps->cur_ilen = nrow + 1; 223 } 224 if (nnz > mumps->cur_jlen) { 225 PetscCall(PetscFree(mumps->ja_alloc)); 226 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 227 mumps->cur_jlen = nnz; 228 } 229 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 230 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 231 *ia_mumps = mumps->ia_alloc; 232 *ja_mumps = mumps->ja_alloc; 233 } 234 #else 235 *ia_mumps = ia; 236 *ja_mumps = ja; 237 #endif 238 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 243 { 244 PetscFunctionBegin; 245 PetscCall(PetscFree(mumps->id.listvar_schur)); 246 PetscCall(PetscFree(mumps->id.redrhs)); 247 PetscCall(PetscFree(mumps->schur_sol)); 248 mumps->id.size_schur = 0; 249 mumps->id.schur_lld = 0; 250 mumps->id.ICNTL(19) = 0; 251 PetscFunctionReturn(PETSC_SUCCESS); 252 } 253 254 /* solve with rhs in mumps->id.redrhs and return in the same location */ 255 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 256 { 257 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 258 Mat S, B, X; 259 MatFactorSchurStatus schurstatus; 260 PetscInt sizesol; 261 262 PetscFunctionBegin; 263 PetscCall(MatFactorFactorizeSchurComplement(F)); 264 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 265 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 266 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 267 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 268 PetscCall(MatBindToCPU(B, S->boundtocpu)); 269 #endif 270 switch (schurstatus) { 271 case MAT_FACTOR_SCHUR_FACTORED: 272 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 273 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 274 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 275 PetscCall(MatBindToCPU(X, S->boundtocpu)); 276 #endif 277 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 278 PetscCall(MatMatSolveTranspose(S, B, X)); 279 } else { 280 PetscCall(MatMatSolve(S, B, X)); 281 } 282 break; 283 case MAT_FACTOR_SCHUR_INVERTED: 284 sizesol = mumps->id.nrhs * mumps->id.size_schur; 285 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 286 PetscCall(PetscFree(mumps->schur_sol)); 287 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 288 mumps->schur_sizesol = sizesol; 289 } 290 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 291 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 292 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 293 PetscCall(MatBindToCPU(X, S->boundtocpu)); 294 #endif 295 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 296 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 297 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 298 } else { 299 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 300 } 301 PetscCall(MatProductSetFromOptions(X)); 302 PetscCall(MatProductSymbolic(X)); 303 PetscCall(MatProductNumeric(X)); 304 305 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 306 break; 307 default: 308 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 309 } 310 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 311 PetscCall(MatDestroy(&B)); 312 PetscCall(MatDestroy(&X)); 313 PetscFunctionReturn(PETSC_SUCCESS); 314 } 315 316 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 317 { 318 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 319 320 PetscFunctionBegin; 321 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 if (!expansion) { /* prepare for the condensation step */ 325 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 326 /* allocate MUMPS internal array to store reduced right-hand sides */ 327 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 328 PetscCall(PetscFree(mumps->id.redrhs)); 329 mumps->id.lredrhs = mumps->id.size_schur; 330 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 331 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 332 } 333 } else { /* prepare for the expansion step */ 334 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 335 PetscCall(MatMumpsSolveSchur_Private(F)); 336 mumps->id.ICNTL(26) = 2; /* expansion phase */ 337 PetscMUMPS_c(mumps); 338 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 339 /* restore defaults */ 340 mumps->id.ICNTL(26) = -1; 341 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 342 if (mumps->id.nrhs > 1) { 343 PetscCall(PetscFree(mumps->id.redrhs)); 344 mumps->id.lredrhs = 0; 345 mumps->sizeredrhs = 0; 346 } 347 } 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 /* 352 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 353 354 input: 355 A - matrix in aij,baij or sbaij format 356 shift - 0: C style output triple; 1: Fortran style output triple. 357 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 358 MAT_REUSE_MATRIX: only the values in v array are updated 359 output: 360 nnz - dim of r, c, and v (number of local nonzero entries of A) 361 r, c, v - row and col index, matrix values (matrix triples) 362 363 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 364 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 365 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 366 367 */ 368 369 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 370 { 371 const PetscScalar *av; 372 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 373 PetscInt64 nz, rnz, i, j, k; 374 PetscMUMPSInt *row, *col; 375 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 376 377 PetscFunctionBegin; 378 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 379 if (reuse == MAT_INITIAL_MATRIX) { 380 nz = aa->nz; 381 ai = aa->i; 382 aj = aa->j; 383 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 384 for (i = k = 0; i < M; i++) { 385 rnz = ai[i + 1] - ai[i]; 386 ajj = aj + ai[i]; 387 for (j = 0; j < rnz; j++) { 388 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 389 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 390 k++; 391 } 392 } 393 mumps->val = (PetscScalar *)av; 394 mumps->irn = row; 395 mumps->jcn = col; 396 mumps->nnz = nz; 397 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 398 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 399 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 404 { 405 PetscInt64 nz, i, j, k, r; 406 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 407 PetscMUMPSInt *row, *col; 408 409 PetscFunctionBegin; 410 nz = a->sliidx[a->totalslices]; 411 if (reuse == MAT_INITIAL_MATRIX) { 412 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 413 for (i = k = 0; i < a->totalslices; i++) { 414 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 415 } 416 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 417 mumps->irn = row; 418 mumps->jcn = col; 419 mumps->nnz = nz; 420 mumps->val = a->val; 421 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 422 else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 427 { 428 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 429 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 430 PetscInt64 M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 431 PetscInt bs; 432 PetscMUMPSInt *row, *col; 433 434 PetscFunctionBegin; 435 if (reuse == MAT_INITIAL_MATRIX) { 436 PetscCall(MatGetBlockSize(A, &bs)); 437 M = A->rmap->N / bs; 438 ai = aa->i; 439 aj = aa->j; 440 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 441 for (i = 0; i < M; i++) { 442 ajj = aj + ai[i]; 443 rnz = ai[i + 1] - ai[i]; 444 for (k = 0; k < rnz; k++) { 445 for (j = 0; j < bs; j++) { 446 for (m = 0; m < bs; m++) { 447 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 448 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 449 idx++; 450 } 451 } 452 } 453 } 454 mumps->irn = row; 455 mumps->jcn = col; 456 mumps->nnz = nz; 457 mumps->val = aa->a; 458 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 459 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 460 PetscFunctionReturn(PETSC_SUCCESS); 461 } 462 463 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 464 { 465 const PetscInt *ai, *aj, *ajj; 466 PetscInt bs; 467 PetscInt64 nz, rnz, i, j, k, m; 468 PetscMUMPSInt *row, *col; 469 PetscScalar *val; 470 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 471 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 472 #if defined(PETSC_USE_COMPLEX) 473 PetscBool isset, hermitian; 474 #endif 475 476 PetscFunctionBegin; 477 #if defined(PETSC_USE_COMPLEX) 478 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 479 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 480 #endif 481 ai = aa->i; 482 aj = aa->j; 483 PetscCall(MatGetBlockSize(A, &bs)); 484 if (reuse == MAT_INITIAL_MATRIX) { 485 const PetscInt64 alloc_size = aa->nz * bs2; 486 487 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 488 if (bs > 1) { 489 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 490 mumps->val = mumps->val_alloc; 491 } else { 492 mumps->val = aa->a; 493 } 494 mumps->irn = row; 495 mumps->jcn = col; 496 } else { 497 row = mumps->irn; 498 col = mumps->jcn; 499 } 500 val = mumps->val; 501 502 nz = 0; 503 if (bs > 1) { 504 for (i = 0; i < mbs; i++) { 505 rnz = ai[i + 1] - ai[i]; 506 ajj = aj + ai[i]; 507 for (j = 0; j < rnz; j++) { 508 for (k = 0; k < bs; k++) { 509 for (m = 0; m < bs; m++) { 510 if (ajj[j] > i || k >= m) { 511 if (reuse == MAT_INITIAL_MATRIX) { 512 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 513 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 514 } 515 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 516 } 517 } 518 } 519 } 520 } 521 } else if (reuse == MAT_INITIAL_MATRIX) { 522 for (i = 0; i < mbs; i++) { 523 rnz = ai[i + 1] - ai[i]; 524 ajj = aj + ai[i]; 525 for (j = 0; j < rnz; j++) { 526 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 527 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 528 nz++; 529 } 530 } 531 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 532 } else if (mumps->nest_vals) 533 PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 534 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 535 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 536 PetscFunctionReturn(PETSC_SUCCESS); 537 } 538 539 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 540 { 541 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 542 PetscInt64 nz, rnz, i, j; 543 const PetscScalar *av, *v1; 544 PetscScalar *val; 545 PetscMUMPSInt *row, *col; 546 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 547 PetscBool missing; 548 #if defined(PETSC_USE_COMPLEX) 549 PetscBool hermitian, isset; 550 #endif 551 552 PetscFunctionBegin; 553 #if defined(PETSC_USE_COMPLEX) 554 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 555 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 556 #endif 557 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 558 ai = aa->i; 559 aj = aa->j; 560 adiag = aa->diag; 561 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 562 if (reuse == MAT_INITIAL_MATRIX) { 563 /* count nz in the upper triangular part of A */ 564 nz = 0; 565 if (missing) { 566 for (i = 0; i < M; i++) { 567 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 568 for (j = ai[i]; j < ai[i + 1]; j++) { 569 if (aj[j] < i) continue; 570 nz++; 571 } 572 } else { 573 nz += ai[i + 1] - adiag[i]; 574 } 575 } 576 } else { 577 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 578 } 579 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 580 PetscCall(PetscMalloc1(nz, &val)); 581 mumps->nnz = nz; 582 mumps->irn = row; 583 mumps->jcn = col; 584 mumps->val = mumps->val_alloc = val; 585 586 nz = 0; 587 if (missing) { 588 for (i = 0; i < M; i++) { 589 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 590 for (j = ai[i]; j < ai[i + 1]; j++) { 591 if (aj[j] < i) continue; 592 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 593 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 594 val[nz] = av[j]; 595 nz++; 596 } 597 } else { 598 rnz = ai[i + 1] - adiag[i]; 599 ajj = aj + adiag[i]; 600 v1 = av + adiag[i]; 601 for (j = 0; j < rnz; j++) { 602 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 603 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 604 val[nz++] = v1[j]; 605 } 606 } 607 } 608 } else { 609 for (i = 0; i < M; i++) { 610 rnz = ai[i + 1] - adiag[i]; 611 ajj = aj + adiag[i]; 612 v1 = av + adiag[i]; 613 for (j = 0; j < rnz; j++) { 614 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 615 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 616 val[nz++] = v1[j]; 617 } 618 } 619 } 620 } else { 621 nz = 0; 622 val = mumps->val; 623 if (missing) { 624 for (i = 0; i < M; i++) { 625 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 626 for (j = ai[i]; j < ai[i + 1]; j++) { 627 if (aj[j] < i) continue; 628 val[nz++] = av[j]; 629 } 630 } else { 631 rnz = ai[i + 1] - adiag[i]; 632 v1 = av + adiag[i]; 633 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 634 } 635 } 636 } else { 637 for (i = 0; i < M; i++) { 638 rnz = ai[i + 1] - adiag[i]; 639 v1 = av + adiag[i]; 640 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 641 } 642 } 643 } 644 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 645 PetscFunctionReturn(PETSC_SUCCESS); 646 } 647 648 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 649 { 650 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 651 PetscInt bs; 652 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 653 PetscMUMPSInt *row, *col; 654 const PetscScalar *av, *bv, *v1, *v2; 655 PetscScalar *val; 656 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 657 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 658 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 659 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 660 #if defined(PETSC_USE_COMPLEX) 661 PetscBool hermitian, isset; 662 #endif 663 664 PetscFunctionBegin; 665 #if defined(PETSC_USE_COMPLEX) 666 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 667 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 668 #endif 669 PetscCall(MatGetBlockSize(A, &bs)); 670 rstart = A->rmap->rstart; 671 ai = aa->i; 672 aj = aa->j; 673 bi = bb->i; 674 bj = bb->j; 675 av = aa->a; 676 bv = bb->a; 677 678 garray = mat->garray; 679 680 if (reuse == MAT_INITIAL_MATRIX) { 681 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 682 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 683 PetscCall(PetscMalloc1(nz, &val)); 684 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 685 mumps->irn = row; 686 mumps->jcn = col; 687 mumps->val = mumps->val_alloc = val; 688 } else { 689 val = mumps->val; 690 } 691 692 jj = 0; 693 irow = rstart; 694 for (i = 0; i < mbs; i++) { 695 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 696 countA = ai[i + 1] - ai[i]; 697 countB = bi[i + 1] - bi[i]; 698 bjj = bj + bi[i]; 699 v1 = av + ai[i] * bs2; 700 v2 = bv + bi[i] * bs2; 701 702 if (bs > 1) { 703 /* A-part */ 704 for (j = 0; j < countA; j++) { 705 for (k = 0; k < bs; k++) { 706 for (m = 0; m < bs; m++) { 707 if (rstart + ajj[j] * bs > irow || k >= m) { 708 if (reuse == MAT_INITIAL_MATRIX) { 709 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 710 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 711 } 712 val[jj++] = v1[j * bs2 + m + k * bs]; 713 } 714 } 715 } 716 } 717 718 /* B-part */ 719 for (j = 0; j < countB; j++) { 720 for (k = 0; k < bs; k++) { 721 for (m = 0; m < bs; m++) { 722 if (reuse == MAT_INITIAL_MATRIX) { 723 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 724 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 725 } 726 val[jj++] = v2[j * bs2 + m + k * bs]; 727 } 728 } 729 } 730 } else { 731 /* A-part */ 732 for (j = 0; j < countA; j++) { 733 if (reuse == MAT_INITIAL_MATRIX) { 734 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 735 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 736 } 737 val[jj++] = v1[j]; 738 } 739 740 /* B-part */ 741 for (j = 0; j < countB; j++) { 742 if (reuse == MAT_INITIAL_MATRIX) { 743 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 744 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 745 } 746 val[jj++] = v2[j]; 747 } 748 } 749 irow += bs; 750 } 751 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 752 PetscFunctionReturn(PETSC_SUCCESS); 753 } 754 755 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 756 { 757 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 758 PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 759 PetscMUMPSInt *row, *col; 760 const PetscScalar *av, *bv, *v1, *v2; 761 PetscScalar *val; 762 Mat Ad, Ao; 763 Mat_SeqAIJ *aa; 764 Mat_SeqAIJ *bb; 765 766 PetscFunctionBegin; 767 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 768 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 769 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 770 771 aa = (Mat_SeqAIJ *)(Ad)->data; 772 bb = (Mat_SeqAIJ *)(Ao)->data; 773 ai = aa->i; 774 aj = aa->j; 775 bi = bb->i; 776 bj = bb->j; 777 778 rstart = A->rmap->rstart; 779 cstart = A->cmap->rstart; 780 781 if (reuse == MAT_INITIAL_MATRIX) { 782 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 783 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 784 PetscCall(PetscMalloc1(nz, &val)); 785 mumps->nnz = nz; 786 mumps->irn = row; 787 mumps->jcn = col; 788 mumps->val = mumps->val_alloc = val; 789 } else { 790 val = mumps->val; 791 } 792 793 jj = 0; 794 irow = rstart; 795 for (i = 0; i < m; i++) { 796 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 797 countA = ai[i + 1] - ai[i]; 798 countB = bi[i + 1] - bi[i]; 799 bjj = bj + bi[i]; 800 v1 = av + ai[i]; 801 v2 = bv + bi[i]; 802 803 /* A-part */ 804 for (j = 0; j < countA; j++) { 805 if (reuse == MAT_INITIAL_MATRIX) { 806 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 807 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 808 } 809 val[jj++] = v1[j]; 810 } 811 812 /* B-part */ 813 for (j = 0; j < countB; j++) { 814 if (reuse == MAT_INITIAL_MATRIX) { 815 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 816 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 817 } 818 val[jj++] = v2[j]; 819 } 820 irow++; 821 } 822 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 823 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 824 PetscFunctionReturn(PETSC_SUCCESS); 825 } 826 827 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 828 { 829 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 830 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 831 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 832 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 833 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 834 const PetscInt bs2 = mat->bs2; 835 PetscInt bs; 836 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 837 PetscMUMPSInt *row, *col; 838 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 839 PetscScalar *val; 840 841 PetscFunctionBegin; 842 PetscCall(MatGetBlockSize(A, &bs)); 843 if (reuse == MAT_INITIAL_MATRIX) { 844 nz = bs2 * (aa->nz + bb->nz); 845 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 846 PetscCall(PetscMalloc1(nz, &val)); 847 mumps->nnz = nz; 848 mumps->irn = row; 849 mumps->jcn = col; 850 mumps->val = mumps->val_alloc = val; 851 } else { 852 val = mumps->val; 853 } 854 855 jj = 0; 856 irow = rstart; 857 for (i = 0; i < mbs; i++) { 858 countA = ai[i + 1] - ai[i]; 859 countB = bi[i + 1] - bi[i]; 860 ajj = aj + ai[i]; 861 bjj = bj + bi[i]; 862 v1 = av + bs2 * ai[i]; 863 v2 = bv + bs2 * bi[i]; 864 865 idx = 0; 866 /* A-part */ 867 for (k = 0; k < countA; k++) { 868 for (j = 0; j < bs; j++) { 869 for (n = 0; n < bs; n++) { 870 if (reuse == MAT_INITIAL_MATRIX) { 871 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 872 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 873 } 874 val[jj++] = v1[idx++]; 875 } 876 } 877 } 878 879 idx = 0; 880 /* B-part */ 881 for (k = 0; k < countB; k++) { 882 for (j = 0; j < bs; j++) { 883 for (n = 0; n < bs; n++) { 884 if (reuse == MAT_INITIAL_MATRIX) { 885 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 886 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 887 } 888 val[jj++] = v2[idx++]; 889 } 890 } 891 } 892 irow += bs; 893 } 894 PetscFunctionReturn(PETSC_SUCCESS); 895 } 896 897 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 898 { 899 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 900 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 901 PetscMUMPSInt *row, *col; 902 const PetscScalar *av, *bv, *v1, *v2; 903 PetscScalar *val; 904 Mat Ad, Ao; 905 Mat_SeqAIJ *aa; 906 Mat_SeqAIJ *bb; 907 #if defined(PETSC_USE_COMPLEX) 908 PetscBool hermitian, isset; 909 #endif 910 911 PetscFunctionBegin; 912 #if defined(PETSC_USE_COMPLEX) 913 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 914 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 915 #endif 916 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 917 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 918 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 919 920 aa = (Mat_SeqAIJ *)(Ad)->data; 921 bb = (Mat_SeqAIJ *)(Ao)->data; 922 ai = aa->i; 923 aj = aa->j; 924 adiag = aa->diag; 925 bi = bb->i; 926 bj = bb->j; 927 928 rstart = A->rmap->rstart; 929 930 if (reuse == MAT_INITIAL_MATRIX) { 931 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 932 nzb = 0; /* num of upper triangular entries in mat->B */ 933 for (i = 0; i < m; i++) { 934 nza += (ai[i + 1] - adiag[i]); 935 countB = bi[i + 1] - bi[i]; 936 bjj = bj + bi[i]; 937 for (j = 0; j < countB; j++) { 938 if (garray[bjj[j]] > rstart) nzb++; 939 } 940 } 941 942 nz = nza + nzb; /* total nz of upper triangular part of mat */ 943 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 944 PetscCall(PetscMalloc1(nz, &val)); 945 mumps->nnz = nz; 946 mumps->irn = row; 947 mumps->jcn = col; 948 mumps->val = mumps->val_alloc = val; 949 } else { 950 val = mumps->val; 951 } 952 953 jj = 0; 954 irow = rstart; 955 for (i = 0; i < m; i++) { 956 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 957 v1 = av + adiag[i]; 958 countA = ai[i + 1] - adiag[i]; 959 countB = bi[i + 1] - bi[i]; 960 bjj = bj + bi[i]; 961 v2 = bv + bi[i]; 962 963 /* A-part */ 964 for (j = 0; j < countA; j++) { 965 if (reuse == MAT_INITIAL_MATRIX) { 966 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 967 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 968 } 969 val[jj++] = v1[j]; 970 } 971 972 /* B-part */ 973 for (j = 0; j < countB; j++) { 974 if (garray[bjj[j]] > rstart) { 975 if (reuse == MAT_INITIAL_MATRIX) { 976 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 977 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 978 } 979 val[jj++] = v2[j]; 980 } 981 } 982 irow++; 983 } 984 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 985 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 986 PetscFunctionReturn(PETSC_SUCCESS); 987 } 988 989 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 990 { 991 const PetscScalar *av; 992 const PetscInt M = A->rmap->n; 993 PetscInt64 i; 994 PetscMUMPSInt *row, *col; 995 Vec v; 996 997 PetscFunctionBegin; 998 PetscCall(MatDiagonalGetDiagonal(A, &v)); 999 PetscCall(VecGetArrayRead(v, &av)); 1000 if (reuse == MAT_INITIAL_MATRIX) { 1001 PetscCall(PetscMalloc2(M, &row, M, &col)); 1002 for (i = 0; i < M; i++) { 1003 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1004 col[i] = row[i]; 1005 } 1006 mumps->val = (PetscScalar *)av; 1007 mumps->irn = row; 1008 mumps->jcn = col; 1009 mumps->nnz = M; 1010 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1011 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1012 PetscCall(VecRestoreArrayRead(v, &av)); 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1017 { 1018 PetscScalar *v; 1019 const PetscInt m = A->rmap->n, N = A->cmap->N; 1020 PetscInt lda; 1021 PetscInt64 i, j; 1022 PetscMUMPSInt *row, *col; 1023 1024 PetscFunctionBegin; 1025 PetscCall(MatDenseGetArray(A, &v)); 1026 PetscCall(MatDenseGetLDA(A, &lda)); 1027 if (reuse == MAT_INITIAL_MATRIX) { 1028 PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 1029 for (i = 0; i < m; i++) { 1030 col[i] = 0; 1031 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1032 } 1033 for (j = 1; j < N; j++) { 1034 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 1035 PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 1036 } 1037 if (lda == m) mumps->val = v; 1038 else { 1039 PetscCall(PetscMalloc1(m * N, &mumps->val)); 1040 mumps->val_alloc = mumps->val; 1041 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1042 } 1043 mumps->irn = row; 1044 mumps->jcn = col; 1045 mumps->nnz = m * N; 1046 } else { 1047 if (lda == m && !mumps->nest_vals) mumps->val = v; 1048 else { 1049 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1050 } 1051 } 1052 PetscCall(MatDenseRestoreArray(A, &v)); 1053 PetscFunctionReturn(PETSC_SUCCESS); 1054 } 1055 1056 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1057 { 1058 Mat **mats; 1059 PetscInt nr, nc; 1060 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1064 if (reuse == MAT_INITIAL_MATRIX) { 1065 PetscMUMPSInt *irns, *jcns; 1066 PetscScalar *vals; 1067 PetscInt64 totnnz, cumnnz, maxnnz; 1068 PetscInt *pjcns_w; 1069 IS *rows, *cols; 1070 PetscInt **rows_idx, **cols_idx; 1071 1072 cumnnz = 0; 1073 maxnnz = 0; 1074 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1075 for (PetscInt r = 0; r < nr; r++) { 1076 for (PetscInt c = 0; c < nc; c++) { 1077 Mat sub = mats[r][c]; 1078 1079 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1080 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1081 if (sub) { 1082 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1083 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE, isDiag, isDense; 1084 MatInfo info; 1085 1086 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1087 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1088 else { 1089 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1090 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1091 } 1092 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1093 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1094 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1095 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1096 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1097 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1098 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1099 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 1100 1101 if (chol) { 1102 if (r == c) { 1103 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1104 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1105 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1106 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1107 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1108 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1109 } else { 1110 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1111 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1112 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1113 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1114 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1115 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1116 } 1117 } else { 1118 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1119 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1120 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1121 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1122 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1123 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1124 } 1125 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1126 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1127 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1128 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1129 maxnnz = PetscMax(maxnnz, info.nz_used); 1130 } 1131 } 1132 } 1133 1134 /* Allocate total COO */ 1135 totnnz = cumnnz; 1136 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1137 PetscCall(PetscMalloc1(totnnz, &vals)); 1138 1139 /* Handle rows and column maps 1140 We directly map rows and use an SF for the columns */ 1141 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1142 PetscCall(MatNestGetISs(A, rows, cols)); 1143 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1144 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1145 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1146 else (void)maxnnz; 1147 1148 cumnnz = 0; 1149 for (PetscInt r = 0; r < nr; r++) { 1150 for (PetscInt c = 0; c < nc; c++) { 1151 Mat sub = mats[r][c]; 1152 const PetscInt *ridx = rows_idx[r]; 1153 const PetscInt *cidx = cols_idx[c]; 1154 PetscInt rst; 1155 PetscSF csf; 1156 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1157 PetscLayout cmap; 1158 1159 mumps->nest_vals_start[r * nc + c] = cumnnz; 1160 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1161 1162 /* Extract inner blocks if needed */ 1163 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1164 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1165 else { 1166 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1167 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1168 } 1169 swap = (PetscBool)(isTrans || isHTrans); 1170 1171 /* Get column layout to map off-process columns */ 1172 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1173 1174 /* Get row start to map on-process rows */ 1175 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1176 1177 /* Directly use the mumps datastructure and use C ordering for now */ 1178 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1179 1180 /* Swap the role of rows and columns indices for transposed blocks 1181 since we need values with global final ordering */ 1182 if (swap) { 1183 cidx = rows_idx[r]; 1184 ridx = cols_idx[c]; 1185 } 1186 1187 /* Communicate column indices 1188 This could have been done with a single SF but it would have complicated the code a lot. 1189 But since we do it only once, we pay the price of setting up an SF for each block */ 1190 if (PetscDefined(USE_64BIT_INDICES)) { 1191 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1192 } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 1193 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1194 PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1195 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1196 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1197 PetscCall(PetscSFDestroy(&csf)); 1198 1199 /* Import indices: use direct map for rows and mapped indices for columns */ 1200 if (swap) { 1201 for (PetscInt k = 0; k < mumps->nnz; k++) { 1202 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1203 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1204 } 1205 } else { 1206 for (PetscInt k = 0; k < mumps->nnz; k++) { 1207 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1208 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1209 } 1210 } 1211 1212 /* Import values to full COO */ 1213 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1214 if (isHTrans) { /* conjugate the entries */ 1215 PetscScalar *v = vals + cumnnz; 1216 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]); 1217 } 1218 1219 /* Shift new starting point and sanity check */ 1220 cumnnz += mumps->nnz; 1221 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1222 1223 /* Free scratch memory */ 1224 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1225 PetscCall(PetscFree(mumps->val_alloc)); 1226 mumps->val = NULL; 1227 mumps->nnz = 0; 1228 } 1229 } 1230 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1231 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1232 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1233 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1234 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1235 mumps->nest_vals_start[nr * nc] = cumnnz; 1236 1237 /* Set pointers for final MUMPS data structure */ 1238 mumps->nest_vals = vals; 1239 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1240 mumps->val = vals; 1241 mumps->irn = irns; 1242 mumps->jcn = jcns; 1243 mumps->nnz = cumnnz; 1244 } else { 1245 PetscScalar *oval = mumps->nest_vals; 1246 for (PetscInt r = 0; r < nr; r++) { 1247 for (PetscInt c = 0; c < nc; c++) { 1248 PetscBool isTrans, isHTrans = PETSC_FALSE; 1249 Mat sub = mats[r][c]; 1250 PetscInt midx = r * nc + c; 1251 1252 if (!mumps->nest_convert_to_triples[midx]) continue; 1253 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1254 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1255 else { 1256 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1257 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1258 } 1259 mumps->val = oval + mumps->nest_vals_start[midx]; 1260 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1261 if (isHTrans) { 1262 PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1263 for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1264 } 1265 } 1266 } 1267 mumps->val = oval; 1268 } 1269 PetscFunctionReturn(PETSC_SUCCESS); 1270 } 1271 1272 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1273 { 1274 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1275 1276 PetscFunctionBegin; 1277 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1278 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1279 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1280 PetscCall(VecDestroy(&mumps->b_seq)); 1281 PetscCall(VecDestroy(&mumps->x_seq)); 1282 PetscCall(PetscFree(mumps->id.perm_in)); 1283 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1284 PetscCall(PetscFree(mumps->val_alloc)); 1285 PetscCall(PetscFree(mumps->info)); 1286 PetscCall(PetscFree(mumps->ICNTL_pre)); 1287 PetscCall(PetscFree(mumps->CNTL_pre)); 1288 PetscCall(MatMumpsResetSchur_Private(mumps)); 1289 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1290 mumps->id.job = JOB_END; 1291 PetscMUMPS_c(mumps); 1292 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1293 if (mumps->mumps_comm != MPI_COMM_NULL) { 1294 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1295 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1296 } 1297 } 1298 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1299 if (mumps->use_petsc_omp_support) { 1300 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1301 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1302 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1303 } 1304 #endif 1305 PetscCall(PetscFree(mumps->ia_alloc)); 1306 PetscCall(PetscFree(mumps->ja_alloc)); 1307 PetscCall(PetscFree(mumps->recvcount)); 1308 PetscCall(PetscFree(mumps->reqs)); 1309 PetscCall(PetscFree(mumps->irhs_loc)); 1310 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1311 PetscCall(PetscFree(mumps->nest_vals)); 1312 PetscCall(PetscFree(A->data)); 1313 1314 /* clear composed functions */ 1315 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1316 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1317 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1318 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1319 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1320 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1321 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1322 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1323 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1324 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1325 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1326 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1327 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1328 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1329 PetscFunctionReturn(PETSC_SUCCESS); 1330 } 1331 1332 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1333 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1334 { 1335 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1336 const PetscMPIInt ompsize = mumps->omp_comm_size; 1337 PetscInt i, m, M, rstart; 1338 1339 PetscFunctionBegin; 1340 PetscCall(MatGetSize(A, &M, NULL)); 1341 PetscCall(MatGetLocalSize(A, &m, NULL)); 1342 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1343 if (ompsize == 1) { 1344 if (!mumps->irhs_loc) { 1345 mumps->nloc_rhs = m; 1346 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1347 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1348 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1349 } 1350 mumps->id.rhs_loc = (MumpsScalar *)array; 1351 } else { 1352 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1353 const PetscInt *ranges; 1354 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1355 MPI_Group petsc_group, omp_group; 1356 PetscScalar *recvbuf = NULL; 1357 1358 if (mumps->is_omp_master) { 1359 /* Lazily initialize the omp stuff for distributed rhs */ 1360 if (!mumps->irhs_loc) { 1361 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1362 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1363 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1364 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1365 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1366 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1367 1368 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1369 mumps->nloc_rhs = 0; 1370 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1371 for (j = 0; j < ompsize; j++) { 1372 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1373 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1374 } 1375 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1376 for (j = k = 0; j < ompsize; j++) { 1377 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1378 } 1379 1380 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1381 PetscCallMPI(MPI_Group_free(&petsc_group)); 1382 PetscCallMPI(MPI_Group_free(&omp_group)); 1383 } 1384 1385 /* Realloc buffers when current nrhs is bigger than what we have met */ 1386 if (nrhs > mumps->max_nrhs) { 1387 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1388 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1389 mumps->max_nrhs = nrhs; 1390 } 1391 1392 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1393 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1394 mumps->rhs_disps[0] = 0; 1395 for (j = 1; j < ompsize; j++) { 1396 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1397 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1398 } 1399 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1400 } 1401 1402 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1403 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1404 1405 if (mumps->is_omp_master) { 1406 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1407 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1408 for (j = 0; j < ompsize; j++) { 1409 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1410 dst = dstbase; 1411 for (i = 0; i < nrhs; i++) { 1412 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1413 src += mumps->rhs_nrow[j]; 1414 dst += mumps->nloc_rhs; 1415 } 1416 dstbase += mumps->rhs_nrow[j]; 1417 } 1418 } 1419 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1420 } 1421 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1422 } 1423 mumps->id.nrhs = nrhs; 1424 mumps->id.nloc_rhs = mumps->nloc_rhs; 1425 mumps->id.lrhs_loc = mumps->nloc_rhs; 1426 mumps->id.irhs_loc = mumps->irhs_loc; 1427 PetscFunctionReturn(PETSC_SUCCESS); 1428 } 1429 1430 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1431 { 1432 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1433 const PetscScalar *rarray = NULL; 1434 PetscScalar *array; 1435 IS is_iden, is_petsc; 1436 PetscInt i; 1437 PetscBool second_solve = PETSC_FALSE; 1438 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1439 1440 PetscFunctionBegin; 1441 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1442 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1443 &cite1)); 1444 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1445 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1446 &cite2)); 1447 1448 if (A->factorerrortype) { 1449 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1450 PetscCall(VecSetInf(x)); 1451 PetscFunctionReturn(PETSC_SUCCESS); 1452 } 1453 1454 mumps->id.nrhs = 1; 1455 if (mumps->petsc_size > 1) { 1456 if (mumps->ICNTL20 == 10) { 1457 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1458 PetscCall(VecGetArrayRead(b, &rarray)); 1459 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1460 } else { 1461 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1462 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1463 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1464 if (!mumps->myid) { 1465 PetscCall(VecGetArray(mumps->b_seq, &array)); 1466 mumps->id.rhs = (MumpsScalar *)array; 1467 } 1468 } 1469 } else { /* petsc_size == 1 */ 1470 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1471 PetscCall(VecCopy(b, x)); 1472 PetscCall(VecGetArray(x, &array)); 1473 mumps->id.rhs = (MumpsScalar *)array; 1474 } 1475 1476 /* 1477 handle condensation step of Schur complement (if any) 1478 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1479 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1480 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1481 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1482 */ 1483 if (mumps->id.size_schur > 0) { 1484 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1485 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1486 second_solve = PETSC_TRUE; 1487 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1488 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1489 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1490 } 1491 /* solve phase */ 1492 mumps->id.job = JOB_SOLVE; 1493 PetscMUMPS_c(mumps); 1494 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1495 1496 /* handle expansion step of Schur complement (if any) */ 1497 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1498 else if (mumps->id.ICNTL(26) == 1) { 1499 PetscCall(MatMumpsSolveSchur_Private(A)); 1500 for (i = 0; i < mumps->id.size_schur; ++i) { 1501 #if !defined(PETSC_USE_COMPLEX) 1502 PetscScalar val = mumps->id.redrhs[i]; 1503 #else 1504 PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i; 1505 #endif 1506 array[mumps->id.listvar_schur[i] - 1] = val; 1507 } 1508 } 1509 1510 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1511 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1512 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1513 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1514 } 1515 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1516 PetscInt *isol2_loc = NULL; 1517 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1518 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1519 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1520 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1521 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1522 PetscCall(ISDestroy(&is_iden)); 1523 PetscCall(ISDestroy(&is_petsc)); 1524 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1525 } 1526 1527 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1528 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1529 } 1530 1531 if (mumps->petsc_size > 1) { 1532 if (mumps->ICNTL20 == 10) { 1533 PetscCall(VecRestoreArrayRead(b, &rarray)); 1534 } else if (!mumps->myid) { 1535 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1536 } 1537 } else PetscCall(VecRestoreArray(x, &array)); 1538 1539 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1540 PetscFunctionReturn(PETSC_SUCCESS); 1541 } 1542 1543 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1544 { 1545 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1546 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1547 1548 PetscFunctionBegin; 1549 mumps->id.ICNTL(9) = 0; 1550 PetscCall(MatSolve_MUMPS(A, b, x)); 1551 mumps->id.ICNTL(9) = value; 1552 PetscFunctionReturn(PETSC_SUCCESS); 1553 } 1554 1555 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1556 { 1557 Mat Bt = NULL; 1558 PetscBool denseX, denseB, flg, flgT; 1559 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1560 PetscInt i, nrhs, M; 1561 PetscScalar *array; 1562 const PetscScalar *rbray; 1563 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1564 PetscMUMPSInt *isol_loc, *isol_loc_save; 1565 PetscScalar *bray, *sol_loc, *sol_loc_save; 1566 IS is_to, is_from; 1567 PetscInt k, proc, j, m, myrstart; 1568 const PetscInt *rstart; 1569 Vec v_mpi, msol_loc; 1570 VecScatter scat_sol; 1571 Vec b_seq; 1572 VecScatter scat_rhs; 1573 PetscScalar *aa; 1574 PetscInt spnr, *ia, *ja; 1575 Mat_MPIAIJ *b = NULL; 1576 1577 PetscFunctionBegin; 1578 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1579 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1580 1581 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1582 if (denseB) { 1583 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1584 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1585 } else { /* sparse B */ 1586 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1587 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1588 if (flgT) { /* input B is transpose of actual RHS matrix, 1589 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1590 PetscCall(MatTransposeGetMat(B, &Bt)); 1591 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1592 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1593 } 1594 1595 PetscCall(MatGetSize(B, &M, &nrhs)); 1596 mumps->id.nrhs = nrhs; 1597 mumps->id.lrhs = M; 1598 mumps->id.rhs = NULL; 1599 1600 if (mumps->petsc_size == 1) { 1601 PetscScalar *aa; 1602 PetscInt spnr, *ia, *ja; 1603 PetscBool second_solve = PETSC_FALSE; 1604 1605 PetscCall(MatDenseGetArray(X, &array)); 1606 mumps->id.rhs = (MumpsScalar *)array; 1607 1608 if (denseB) { 1609 /* copy B to X */ 1610 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1611 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1612 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1613 } else { /* sparse B */ 1614 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1615 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1616 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1617 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1618 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1619 } 1620 /* handle condensation step of Schur complement (if any) */ 1621 if (mumps->id.size_schur > 0) { 1622 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1623 second_solve = PETSC_TRUE; 1624 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1625 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1626 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1627 } 1628 /* solve phase */ 1629 mumps->id.job = JOB_SOLVE; 1630 PetscMUMPS_c(mumps); 1631 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1632 1633 /* handle expansion step of Schur complement (if any) */ 1634 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1635 else if (mumps->id.ICNTL(26) == 1) { 1636 PetscCall(MatMumpsSolveSchur_Private(A)); 1637 for (j = 0; j < nrhs; ++j) 1638 for (i = 0; i < mumps->id.size_schur; ++i) { 1639 #if !defined(PETSC_USE_COMPLEX) 1640 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs]; 1641 #else 1642 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i; 1643 #endif 1644 array[mumps->id.listvar_schur[i] - 1 + j * M] = val; 1645 } 1646 } 1647 if (!denseB) { /* sparse B */ 1648 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1649 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1650 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1651 } 1652 PetscCall(MatDenseRestoreArray(X, &array)); 1653 PetscFunctionReturn(PETSC_SUCCESS); 1654 } 1655 1656 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1657 PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1658 1659 /* create msol_loc to hold mumps local solution */ 1660 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1661 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1662 1663 lsol_loc = mumps->id.lsol_loc; 1664 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1665 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1666 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1667 mumps->id.isol_loc = isol_loc; 1668 1669 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1670 1671 if (denseB) { 1672 if (mumps->ICNTL20 == 10) { 1673 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1674 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1675 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1676 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1677 PetscCall(MatGetLocalSize(B, &m, NULL)); 1678 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1679 } else { 1680 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1681 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1682 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1683 0, re-arrange B into desired order, which is a local operation. 1684 */ 1685 1686 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1687 /* wrap dense rhs matrix B into a vector v_mpi */ 1688 PetscCall(MatGetLocalSize(B, &m, NULL)); 1689 PetscCall(MatDenseGetArray(B, &bray)); 1690 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1691 PetscCall(MatDenseRestoreArray(B, &bray)); 1692 1693 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1694 if (!mumps->myid) { 1695 PetscInt *idx; 1696 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1697 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1698 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1699 k = 0; 1700 for (proc = 0; proc < mumps->petsc_size; proc++) { 1701 for (j = 0; j < nrhs; j++) { 1702 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1703 } 1704 } 1705 1706 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1707 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1708 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1709 } else { 1710 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1711 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1712 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1713 } 1714 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1715 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1716 PetscCall(ISDestroy(&is_to)); 1717 PetscCall(ISDestroy(&is_from)); 1718 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1719 1720 if (!mumps->myid) { /* define rhs on the host */ 1721 PetscCall(VecGetArray(b_seq, &bray)); 1722 mumps->id.rhs = (MumpsScalar *)bray; 1723 PetscCall(VecRestoreArray(b_seq, &bray)); 1724 } 1725 } 1726 } else { /* sparse B */ 1727 b = (Mat_MPIAIJ *)Bt->data; 1728 1729 /* wrap dense X into a vector v_mpi */ 1730 PetscCall(MatGetLocalSize(X, &m, NULL)); 1731 PetscCall(MatDenseGetArray(X, &bray)); 1732 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1733 PetscCall(MatDenseRestoreArray(X, &bray)); 1734 1735 if (!mumps->myid) { 1736 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1737 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1738 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1739 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1740 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1741 } else { 1742 mumps->id.irhs_ptr = NULL; 1743 mumps->id.irhs_sparse = NULL; 1744 mumps->id.nz_rhs = 0; 1745 mumps->id.rhs_sparse = NULL; 1746 } 1747 } 1748 1749 /* solve phase */ 1750 mumps->id.job = JOB_SOLVE; 1751 PetscMUMPS_c(mumps); 1752 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1753 1754 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1755 PetscCall(MatDenseGetArray(X, &array)); 1756 PetscCall(VecPlaceArray(v_mpi, array)); 1757 1758 /* create scatter scat_sol */ 1759 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1760 /* iidx: index for scatter mumps solution to petsc X */ 1761 1762 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1763 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1764 for (i = 0; i < lsol_loc; i++) { 1765 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1766 1767 for (proc = 0; proc < mumps->petsc_size; proc++) { 1768 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1769 myrstart = rstart[proc]; 1770 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1771 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1772 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1773 break; 1774 } 1775 } 1776 1777 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1778 } 1779 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1780 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1781 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1782 PetscCall(ISDestroy(&is_from)); 1783 PetscCall(ISDestroy(&is_to)); 1784 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1785 PetscCall(MatDenseRestoreArray(X, &array)); 1786 1787 /* free spaces */ 1788 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1789 mumps->id.isol_loc = isol_loc_save; 1790 1791 PetscCall(PetscFree2(sol_loc, isol_loc)); 1792 PetscCall(PetscFree(idxx)); 1793 PetscCall(VecDestroy(&msol_loc)); 1794 PetscCall(VecDestroy(&v_mpi)); 1795 if (!denseB) { 1796 if (!mumps->myid) { 1797 b = (Mat_MPIAIJ *)Bt->data; 1798 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1799 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1800 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1801 } 1802 } else { 1803 if (mumps->ICNTL20 == 0) { 1804 PetscCall(VecDestroy(&b_seq)); 1805 PetscCall(VecScatterDestroy(&scat_rhs)); 1806 } 1807 } 1808 PetscCall(VecScatterDestroy(&scat_sol)); 1809 PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)))); 1810 PetscFunctionReturn(PETSC_SUCCESS); 1811 } 1812 1813 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1814 { 1815 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1816 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1817 1818 PetscFunctionBegin; 1819 mumps->id.ICNTL(9) = 0; 1820 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1821 mumps->id.ICNTL(9) = value; 1822 PetscFunctionReturn(PETSC_SUCCESS); 1823 } 1824 1825 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1826 { 1827 PetscBool flg; 1828 Mat B; 1829 1830 PetscFunctionBegin; 1831 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1832 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1833 1834 /* Create B=Bt^T that uses Bt's data structure */ 1835 PetscCall(MatCreateTranspose(Bt, &B)); 1836 1837 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1838 PetscCall(MatDestroy(&B)); 1839 PetscFunctionReturn(PETSC_SUCCESS); 1840 } 1841 1842 #if !defined(PETSC_USE_COMPLEX) 1843 /* 1844 input: 1845 F: numeric factor 1846 output: 1847 nneg: total number of negative pivots 1848 nzero: total number of zero pivots 1849 npos: (global dimension of F) - nneg - nzero 1850 */ 1851 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1852 { 1853 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1854 PetscMPIInt size; 1855 1856 PetscFunctionBegin; 1857 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1858 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1859 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1860 1861 if (nneg) *nneg = mumps->id.INFOG(12); 1862 if (nzero || npos) { 1863 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1864 if (nzero) *nzero = mumps->id.INFOG(28); 1865 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1866 } 1867 PetscFunctionReturn(PETSC_SUCCESS); 1868 } 1869 #endif 1870 1871 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1872 { 1873 PetscInt i, nreqs; 1874 PetscMUMPSInt *irn, *jcn; 1875 PetscMPIInt count; 1876 PetscInt64 totnnz, remain; 1877 const PetscInt osize = mumps->omp_comm_size; 1878 PetscScalar *val; 1879 1880 PetscFunctionBegin; 1881 if (osize > 1) { 1882 if (reuse == MAT_INITIAL_MATRIX) { 1883 /* master first gathers counts of nonzeros to receive */ 1884 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1885 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1886 1887 /* Then each computes number of send/recvs */ 1888 if (mumps->is_omp_master) { 1889 /* Start from 1 since self communication is not done in MPI */ 1890 nreqs = 0; 1891 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1892 } else { 1893 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1894 } 1895 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1896 1897 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1898 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1899 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1900 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1901 */ 1902 nreqs = 0; /* counter for actual send/recvs */ 1903 if (mumps->is_omp_master) { 1904 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1905 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1906 PetscCall(PetscMalloc1(totnnz, &val)); 1907 1908 /* Self communication */ 1909 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1910 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1911 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1912 1913 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1914 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1915 PetscCall(PetscFree(mumps->val_alloc)); 1916 mumps->nnz = totnnz; 1917 mumps->irn = irn; 1918 mumps->jcn = jcn; 1919 mumps->val = mumps->val_alloc = val; 1920 1921 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1922 jcn += mumps->recvcount[0]; 1923 val += mumps->recvcount[0]; 1924 1925 /* Remote communication */ 1926 for (i = 1; i < osize; i++) { 1927 count = PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 1928 remain = mumps->recvcount[i] - count; 1929 while (count > 0) { 1930 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1931 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1932 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1933 irn += count; 1934 jcn += count; 1935 val += count; 1936 count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1937 remain -= count; 1938 } 1939 } 1940 } else { 1941 irn = mumps->irn; 1942 jcn = mumps->jcn; 1943 val = mumps->val; 1944 count = PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 1945 remain = mumps->nnz - count; 1946 while (count > 0) { 1947 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1948 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1949 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1950 irn += count; 1951 jcn += count; 1952 val += count; 1953 count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1954 remain -= count; 1955 } 1956 } 1957 } else { 1958 nreqs = 0; 1959 if (mumps->is_omp_master) { 1960 val = mumps->val + mumps->recvcount[0]; 1961 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1962 count = PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 1963 remain = mumps->recvcount[i] - count; 1964 while (count > 0) { 1965 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1966 val += count; 1967 count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1968 remain -= count; 1969 } 1970 } 1971 } else { 1972 val = mumps->val; 1973 count = PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 1974 remain = mumps->nnz - count; 1975 while (count > 0) { 1976 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1977 val += count; 1978 count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1979 remain -= count; 1980 } 1981 } 1982 } 1983 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1984 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1985 } 1986 PetscFunctionReturn(PETSC_SUCCESS); 1987 } 1988 1989 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info) 1990 { 1991 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1992 PetscBool isMPIAIJ; 1993 1994 PetscFunctionBegin; 1995 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1996 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1997 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1998 PetscFunctionReturn(PETSC_SUCCESS); 1999 } 2000 2001 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 2002 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 2003 2004 /* numerical factorization phase */ 2005 mumps->id.job = JOB_FACTNUMERIC; 2006 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2007 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 2008 } else { 2009 mumps->id.a_loc = (MumpsScalar *)mumps->val; 2010 } 2011 PetscMUMPS_c(mumps); 2012 if (mumps->id.INFOG(1) < 0) { 2013 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2014 if (mumps->id.INFOG(1) == -10) { 2015 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2016 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2017 } else if (mumps->id.INFOG(1) == -13) { 2018 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2019 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2020 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2021 PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2022 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2023 } else { 2024 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2025 F->factorerrortype = MAT_FACTOR_OTHER; 2026 } 2027 } 2028 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2029 2030 F->assembled = PETSC_TRUE; 2031 2032 if (F->schur) { /* reset Schur status to unfactored */ 2033 #if defined(PETSC_HAVE_CUDA) 2034 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 2035 #endif 2036 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2037 mumps->id.ICNTL(19) = 2; 2038 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2039 } 2040 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2041 } 2042 2043 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2044 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2045 2046 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2047 if (mumps->petsc_size > 1) { 2048 PetscInt lsol_loc; 2049 PetscScalar *sol_loc; 2050 2051 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 2052 2053 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2054 if (mumps->x_seq) { 2055 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2056 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 2057 PetscCall(VecDestroy(&mumps->x_seq)); 2058 } 2059 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2060 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 2061 mumps->id.lsol_loc = lsol_loc; 2062 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 2063 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 2064 } 2065 PetscCall(PetscLogFlops((double)mumps->id.RINFO(2))); 2066 PetscFunctionReturn(PETSC_SUCCESS); 2067 } 2068 2069 /* Sets MUMPS options from the options database */ 2070 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2071 { 2072 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2073 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2074 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2075 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 2076 MumpsScalar *arr; 2077 2078 PetscFunctionBegin; 2079 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2080 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2081 PetscInt nthreads = 0; 2082 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2083 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2084 2085 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2086 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2087 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2088 2089 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2090 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2091 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2092 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2093 if (mumps->use_petsc_omp_support) { 2094 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2095 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2096 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2097 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2098 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2099 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2100 #endif 2101 } else { 2102 mumps->omp_comm = PETSC_COMM_SELF; 2103 mumps->mumps_comm = mumps->petsc_comm; 2104 mumps->is_omp_master = PETSC_TRUE; 2105 } 2106 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2107 mumps->reqs = NULL; 2108 mumps->tag = 0; 2109 2110 if (mumps->mumps_comm != MPI_COMM_NULL) { 2111 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2112 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2113 MPI_Comm comm; 2114 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2115 mumps->mumps_comm = comm; 2116 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2117 } 2118 2119 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2120 mumps->id.job = JOB_INIT; 2121 mumps->id.par = 1; /* host participates factorizaton and solve */ 2122 mumps->id.sym = mumps->sym; 2123 2124 size = mumps->id.size_schur; 2125 arr = mumps->id.schur; 2126 listvar_schur = mumps->id.listvar_schur; 2127 PetscMUMPS_c(mumps); 2128 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2129 /* restore cached ICNTL and CNTL values */ 2130 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2131 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2132 PetscCall(PetscFree(mumps->ICNTL_pre)); 2133 PetscCall(PetscFree(mumps->CNTL_pre)); 2134 2135 if (schur) { 2136 mumps->id.size_schur = size; 2137 mumps->id.schur_lld = size; 2138 mumps->id.schur = arr; 2139 mumps->id.listvar_schur = listvar_schur; 2140 if (mumps->petsc_size > 1) { 2141 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2142 2143 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2144 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2145 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2146 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2147 } else { 2148 if (F->factortype == MAT_FACTOR_LU) { 2149 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2150 } else { 2151 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2152 } 2153 } 2154 mumps->id.ICNTL(26) = -1; 2155 } 2156 2157 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2158 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2159 */ 2160 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2161 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2162 2163 mumps->scat_rhs = NULL; 2164 mumps->scat_sol = NULL; 2165 2166 /* set PETSc-MUMPS default options - override MUMPS default */ 2167 mumps->id.ICNTL(3) = 0; 2168 mumps->id.ICNTL(4) = 0; 2169 if (mumps->petsc_size == 1) { 2170 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2171 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2172 } else { 2173 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2174 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2175 } 2176 } 2177 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2178 if (flg) mumps->id.ICNTL(1) = icntl; 2179 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2180 if (flg) mumps->id.ICNTL(2) = icntl; 2181 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2182 if (flg) mumps->id.ICNTL(3) = icntl; 2183 2184 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2185 if (flg) mumps->id.ICNTL(4) = icntl; 2186 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2187 2188 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2189 if (flg) mumps->id.ICNTL(6) = icntl; 2190 2191 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2192 if (flg) { 2193 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2194 mumps->id.ICNTL(7) = icntl; 2195 } 2196 2197 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2198 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2199 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2200 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2201 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2202 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2203 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2204 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2205 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2206 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2207 if (flg) { 2208 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2209 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2210 } 2211 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2212 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2213 PetscCall(MatDestroy(&F->schur)); 2214 PetscCall(MatMumpsResetSchur_Private(mumps)); 2215 } 2216 2217 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2218 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2219 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2220 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2221 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2222 In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 2223 */ 2224 #if PETSC_PKG_MUMPS_VERSION_GE(5, 6, 2) && defined(PETSC_HAVE_MUMPS_AVOID_MPI_IN_PLACE) 2225 mumps->ICNTL20 = 10; 2226 #elif PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 2227 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2228 #else 2229 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2230 #endif 2231 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2232 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2233 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2234 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2235 #endif 2236 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2237 2238 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2239 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2240 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2241 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2242 2243 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2244 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2245 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2246 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2247 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2248 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2249 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2250 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2251 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2252 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2253 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2254 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2255 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2256 2257 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2258 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2259 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2260 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2261 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2262 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2263 2264 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2265 2266 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2267 if (ninfo) { 2268 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2269 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2270 mumps->ninfo = ninfo; 2271 for (i = 0; i < ninfo; i++) { 2272 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2273 mumps->info[i] = info[i]; 2274 } 2275 } 2276 PetscOptionsEnd(); 2277 PetscFunctionReturn(PETSC_SUCCESS); 2278 } 2279 2280 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps) 2281 { 2282 PetscFunctionBegin; 2283 if (mumps->id.INFOG(1) < 0) { 2284 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2285 if (mumps->id.INFOG(1) == -6) { 2286 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2287 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2288 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2289 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2290 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2291 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2292 PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n")); 2293 } else { 2294 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2295 F->factorerrortype = MAT_FACTOR_OTHER; 2296 } 2297 } 2298 PetscFunctionReturn(PETSC_SUCCESS); 2299 } 2300 2301 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2302 { 2303 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2304 Vec b; 2305 const PetscInt M = A->rmap->N; 2306 2307 PetscFunctionBegin; 2308 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2309 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2310 PetscFunctionReturn(PETSC_SUCCESS); 2311 } 2312 2313 /* Set MUMPS options from the options database */ 2314 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2315 2316 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2317 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2318 2319 /* analysis phase */ 2320 mumps->id.job = JOB_FACTSYMBOLIC; 2321 mumps->id.n = M; 2322 switch (mumps->id.ICNTL(18)) { 2323 case 0: /* centralized assembled matrix input */ 2324 if (!mumps->myid) { 2325 mumps->id.nnz = mumps->nnz; 2326 mumps->id.irn = mumps->irn; 2327 mumps->id.jcn = mumps->jcn; 2328 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2329 if (r) { 2330 mumps->id.ICNTL(7) = 1; 2331 if (!mumps->myid) { 2332 const PetscInt *idx; 2333 PetscInt i; 2334 2335 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2336 PetscCall(ISGetIndices(r, &idx)); 2337 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2338 PetscCall(ISRestoreIndices(r, &idx)); 2339 } 2340 } 2341 } 2342 break; 2343 case 3: /* distributed assembled matrix input (size>1) */ 2344 mumps->id.nnz_loc = mumps->nnz; 2345 mumps->id.irn_loc = mumps->irn; 2346 mumps->id.jcn_loc = mumps->jcn; 2347 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2348 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2349 PetscCall(MatCreateVecs(A, NULL, &b)); 2350 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2351 PetscCall(VecDestroy(&b)); 2352 } 2353 break; 2354 } 2355 PetscMUMPS_c(mumps); 2356 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2357 2358 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2359 F->ops->solve = MatSolve_MUMPS; 2360 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2361 F->ops->matsolve = MatMatSolve_MUMPS; 2362 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2363 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2364 2365 mumps->matstruc = SAME_NONZERO_PATTERN; 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } 2368 2369 /* Note the Petsc r and c permutations are ignored */ 2370 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2371 { 2372 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2373 Vec b; 2374 const PetscInt M = A->rmap->N; 2375 2376 PetscFunctionBegin; 2377 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2378 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2379 PetscFunctionReturn(PETSC_SUCCESS); 2380 } 2381 2382 /* Set MUMPS options from the options database */ 2383 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2384 2385 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2386 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2387 2388 /* analysis phase */ 2389 mumps->id.job = JOB_FACTSYMBOLIC; 2390 mumps->id.n = M; 2391 switch (mumps->id.ICNTL(18)) { 2392 case 0: /* centralized assembled matrix input */ 2393 if (!mumps->myid) { 2394 mumps->id.nnz = mumps->nnz; 2395 mumps->id.irn = mumps->irn; 2396 mumps->id.jcn = mumps->jcn; 2397 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2398 } 2399 break; 2400 case 3: /* distributed assembled matrix input (size>1) */ 2401 mumps->id.nnz_loc = mumps->nnz; 2402 mumps->id.irn_loc = mumps->irn; 2403 mumps->id.jcn_loc = mumps->jcn; 2404 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2405 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2406 PetscCall(MatCreateVecs(A, NULL, &b)); 2407 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2408 PetscCall(VecDestroy(&b)); 2409 } 2410 break; 2411 } 2412 PetscMUMPS_c(mumps); 2413 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2414 2415 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2416 F->ops->solve = MatSolve_MUMPS; 2417 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2418 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2419 2420 mumps->matstruc = SAME_NONZERO_PATTERN; 2421 PetscFunctionReturn(PETSC_SUCCESS); 2422 } 2423 2424 /* Note the Petsc r permutation and factor info are ignored */ 2425 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info) 2426 { 2427 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2428 Vec b; 2429 const PetscInt M = A->rmap->N; 2430 2431 PetscFunctionBegin; 2432 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2433 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2434 PetscFunctionReturn(PETSC_SUCCESS); 2435 } 2436 2437 /* Set MUMPS options from the options database */ 2438 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2439 2440 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2441 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2442 2443 /* analysis phase */ 2444 mumps->id.job = JOB_FACTSYMBOLIC; 2445 mumps->id.n = M; 2446 switch (mumps->id.ICNTL(18)) { 2447 case 0: /* centralized assembled matrix input */ 2448 if (!mumps->myid) { 2449 mumps->id.nnz = mumps->nnz; 2450 mumps->id.irn = mumps->irn; 2451 mumps->id.jcn = mumps->jcn; 2452 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2453 } 2454 break; 2455 case 3: /* distributed assembled matrix input (size>1) */ 2456 mumps->id.nnz_loc = mumps->nnz; 2457 mumps->id.irn_loc = mumps->irn; 2458 mumps->id.jcn_loc = mumps->jcn; 2459 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2460 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2461 PetscCall(MatCreateVecs(A, NULL, &b)); 2462 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2463 PetscCall(VecDestroy(&b)); 2464 } 2465 break; 2466 } 2467 PetscMUMPS_c(mumps); 2468 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2469 2470 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2471 F->ops->solve = MatSolve_MUMPS; 2472 F->ops->solvetranspose = MatSolve_MUMPS; 2473 F->ops->matsolve = MatMatSolve_MUMPS; 2474 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2475 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2476 #if defined(PETSC_USE_COMPLEX) 2477 F->ops->getinertia = NULL; 2478 #else 2479 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2480 #endif 2481 2482 mumps->matstruc = SAME_NONZERO_PATTERN; 2483 PetscFunctionReturn(PETSC_SUCCESS); 2484 } 2485 2486 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2487 { 2488 PetscBool iascii; 2489 PetscViewerFormat format; 2490 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2491 2492 PetscFunctionBegin; 2493 /* check if matrix is mumps type */ 2494 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2495 2496 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2497 if (iascii) { 2498 PetscCall(PetscViewerGetFormat(viewer, &format)); 2499 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2500 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2501 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2502 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2503 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2504 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2505 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2506 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2507 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2508 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2509 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2510 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2511 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2512 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2513 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2514 if (mumps->id.ICNTL(11) > 0) { 2515 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)mumps->id.RINFOG(4))); 2516 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)mumps->id.RINFOG(5))); 2517 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)mumps->id.RINFOG(6))); 2518 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)mumps->id.RINFOG(7), (double)mumps->id.RINFOG(8))); 2519 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)mumps->id.RINFOG(9))); 2520 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)mumps->id.RINFOG(10), (double)mumps->id.RINFOG(11))); 2521 } 2522 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2523 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2524 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2525 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2526 /* ICNTL(15-17) not used */ 2527 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2528 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2529 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2530 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2531 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2532 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2533 2534 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2535 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2536 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2537 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2538 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2539 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2540 2541 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2542 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2543 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2544 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2545 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2546 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2547 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2548 2549 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)mumps->id.CNTL(1))); 2550 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)mumps->id.CNTL(2))); 2551 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)mumps->id.CNTL(3))); 2552 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)mumps->id.CNTL(4))); 2553 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)mumps->id.CNTL(5))); 2554 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)mumps->id.CNTL(7))); 2555 2556 /* information local to each processor */ 2557 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2558 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2559 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(1))); 2560 PetscCall(PetscViewerFlush(viewer)); 2561 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2562 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(2))); 2563 PetscCall(PetscViewerFlush(viewer)); 2564 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2565 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(3))); 2566 PetscCall(PetscViewerFlush(viewer)); 2567 2568 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2569 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2570 PetscCall(PetscViewerFlush(viewer)); 2571 2572 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2573 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2574 PetscCall(PetscViewerFlush(viewer)); 2575 2576 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2577 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2578 PetscCall(PetscViewerFlush(viewer)); 2579 2580 if (mumps->ninfo && mumps->ninfo <= 80) { 2581 PetscInt i; 2582 for (i = 0; i < mumps->ninfo; i++) { 2583 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2584 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2585 PetscCall(PetscViewerFlush(viewer)); 2586 } 2587 } 2588 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2589 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2590 2591 if (mumps->myid == 0) { /* information from the host */ 2592 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)mumps->id.RINFOG(1))); 2593 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)mumps->id.RINFOG(2))); 2594 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)mumps->id.RINFOG(3))); 2595 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)mumps->id.RINFOG(12), (double)mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2596 2597 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2598 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2599 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2600 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2601 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2602 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2603 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2604 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2605 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2606 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2607 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2608 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2609 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2610 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2611 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2612 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2613 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2614 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2615 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2616 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2617 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2618 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2619 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2620 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2621 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2622 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2623 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2624 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2625 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2626 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2627 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2628 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2629 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2630 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2631 } 2632 } 2633 } 2634 PetscFunctionReturn(PETSC_SUCCESS); 2635 } 2636 2637 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info) 2638 { 2639 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2640 2641 PetscFunctionBegin; 2642 info->block_size = 1.0; 2643 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2644 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2645 info->nz_unneeded = 0.0; 2646 info->assemblies = 0.0; 2647 info->mallocs = 0.0; 2648 info->memory = 0.0; 2649 info->fill_ratio_given = 0; 2650 info->fill_ratio_needed = 0; 2651 info->factor_mallocs = 0; 2652 PetscFunctionReturn(PETSC_SUCCESS); 2653 } 2654 2655 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2656 { 2657 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2658 const PetscScalar *arr; 2659 const PetscInt *idxs; 2660 PetscInt size, i; 2661 2662 PetscFunctionBegin; 2663 PetscCall(ISGetLocalSize(is, &size)); 2664 /* Schur complement matrix */ 2665 PetscCall(MatDestroy(&F->schur)); 2666 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2667 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2668 mumps->id.schur = (MumpsScalar *)arr; 2669 mumps->id.size_schur = size; 2670 mumps->id.schur_lld = size; 2671 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2672 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2673 2674 /* MUMPS expects Fortran style indices */ 2675 PetscCall(PetscFree(mumps->id.listvar_schur)); 2676 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2677 PetscCall(ISGetIndices(is, &idxs)); 2678 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2679 PetscCall(ISRestoreIndices(is, &idxs)); 2680 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2681 mumps->id.ICNTL(26) = -1; 2682 PetscFunctionReturn(PETSC_SUCCESS); 2683 } 2684 2685 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2686 { 2687 Mat St; 2688 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2689 PetscScalar *array; 2690 2691 PetscFunctionBegin; 2692 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 2693 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2694 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2695 PetscCall(MatSetType(St, MATDENSE)); 2696 PetscCall(MatSetUp(St)); 2697 PetscCall(MatDenseGetArray(St, &array)); 2698 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2699 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2700 PetscInt i, j, N = mumps->id.size_schur; 2701 for (i = 0; i < N; i++) { 2702 for (j = 0; j < N; j++) { 2703 #if !defined(PETSC_USE_COMPLEX) 2704 PetscScalar val = mumps->id.schur[i * N + j]; 2705 #else 2706 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2707 #endif 2708 array[j * N + i] = val; 2709 } 2710 } 2711 } else { /* stored by columns */ 2712 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2713 } 2714 } else { /* either full or lower-triangular (not packed) */ 2715 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2716 PetscInt i, j, N = mumps->id.size_schur; 2717 for (i = 0; i < N; i++) { 2718 for (j = i; j < N; j++) { 2719 #if !defined(PETSC_USE_COMPLEX) 2720 PetscScalar val = mumps->id.schur[i * N + j]; 2721 #else 2722 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2723 #endif 2724 array[i * N + j] = array[j * N + i] = val; 2725 } 2726 } 2727 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2728 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2729 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2730 PetscInt i, j, N = mumps->id.size_schur; 2731 for (i = 0; i < N; i++) { 2732 for (j = 0; j < i + 1; j++) { 2733 #if !defined(PETSC_USE_COMPLEX) 2734 PetscScalar val = mumps->id.schur[i * N + j]; 2735 #else 2736 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2737 #endif 2738 array[i * N + j] = array[j * N + i] = val; 2739 } 2740 } 2741 } 2742 } 2743 PetscCall(MatDenseRestoreArray(St, &array)); 2744 *S = St; 2745 PetscFunctionReturn(PETSC_SUCCESS); 2746 } 2747 2748 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2749 { 2750 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2751 2752 PetscFunctionBegin; 2753 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2754 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2755 for (i = 0; i < nICNTL_pre; ++i) 2756 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2757 if (i == nICNTL_pre) { /* not already cached */ 2758 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2759 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2760 mumps->ICNTL_pre[0]++; 2761 } 2762 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2763 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2764 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2765 PetscFunctionReturn(PETSC_SUCCESS); 2766 } 2767 2768 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2769 { 2770 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2771 2772 PetscFunctionBegin; 2773 if (mumps->id.job == JOB_NULL) { 2774 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2775 *ival = 0; 2776 for (i = 0; i < nICNTL_pre; ++i) { 2777 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2778 } 2779 } else *ival = mumps->id.ICNTL(icntl); 2780 PetscFunctionReturn(PETSC_SUCCESS); 2781 } 2782 2783 /*@ 2784 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2785 2786 Logically Collective 2787 2788 Input Parameters: 2789 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2790 . icntl - index of MUMPS parameter array ICNTL() 2791 - ival - value of MUMPS ICNTL(icntl) 2792 2793 Options Database Key: 2794 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2795 2796 Level: beginner 2797 2798 References: 2799 . * - MUMPS Users' Guide 2800 2801 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2802 @*/ 2803 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2804 { 2805 PetscFunctionBegin; 2806 PetscValidType(F, 1); 2807 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2808 PetscValidLogicalCollectiveInt(F, icntl, 2); 2809 PetscValidLogicalCollectiveInt(F, ival, 3); 2810 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2811 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2812 PetscFunctionReturn(PETSC_SUCCESS); 2813 } 2814 2815 /*@ 2816 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2817 2818 Logically Collective 2819 2820 Input Parameters: 2821 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2822 - icntl - index of MUMPS parameter array ICNTL() 2823 2824 Output Parameter: 2825 . ival - value of MUMPS ICNTL(icntl) 2826 2827 Level: beginner 2828 2829 References: 2830 . * - MUMPS Users' Guide 2831 2832 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2833 @*/ 2834 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2835 { 2836 PetscFunctionBegin; 2837 PetscValidType(F, 1); 2838 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2839 PetscValidLogicalCollectiveInt(F, icntl, 2); 2840 PetscAssertPointer(ival, 3); 2841 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2842 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2843 PetscFunctionReturn(PETSC_SUCCESS); 2844 } 2845 2846 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2847 { 2848 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2849 2850 PetscFunctionBegin; 2851 if (mumps->id.job == JOB_NULL) { 2852 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2853 for (i = 0; i < nCNTL_pre; ++i) 2854 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2855 if (i == nCNTL_pre) { 2856 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2857 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2858 mumps->CNTL_pre[0]++; 2859 } 2860 mumps->CNTL_pre[1 + 2 * i] = icntl; 2861 mumps->CNTL_pre[2 + 2 * i] = val; 2862 } else mumps->id.CNTL(icntl) = val; 2863 PetscFunctionReturn(PETSC_SUCCESS); 2864 } 2865 2866 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2867 { 2868 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2869 2870 PetscFunctionBegin; 2871 if (mumps->id.job == JOB_NULL) { 2872 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2873 *val = 0.0; 2874 for (i = 0; i < nCNTL_pre; ++i) { 2875 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2876 } 2877 } else *val = mumps->id.CNTL(icntl); 2878 PetscFunctionReturn(PETSC_SUCCESS); 2879 } 2880 2881 /*@ 2882 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2883 2884 Logically Collective 2885 2886 Input Parameters: 2887 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2888 . icntl - index of MUMPS parameter array CNTL() 2889 - val - value of MUMPS CNTL(icntl) 2890 2891 Options Database Key: 2892 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2893 2894 Level: beginner 2895 2896 References: 2897 . * - MUMPS Users' Guide 2898 2899 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2900 @*/ 2901 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2902 { 2903 PetscFunctionBegin; 2904 PetscValidType(F, 1); 2905 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2906 PetscValidLogicalCollectiveInt(F, icntl, 2); 2907 PetscValidLogicalCollectiveReal(F, val, 3); 2908 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2909 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2910 PetscFunctionReturn(PETSC_SUCCESS); 2911 } 2912 2913 /*@ 2914 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2915 2916 Logically Collective 2917 2918 Input Parameters: 2919 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2920 - icntl - index of MUMPS parameter array CNTL() 2921 2922 Output Parameter: 2923 . val - value of MUMPS CNTL(icntl) 2924 2925 Level: beginner 2926 2927 References: 2928 . * - MUMPS Users' Guide 2929 2930 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2931 @*/ 2932 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2933 { 2934 PetscFunctionBegin; 2935 PetscValidType(F, 1); 2936 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2937 PetscValidLogicalCollectiveInt(F, icntl, 2); 2938 PetscAssertPointer(val, 3); 2939 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2940 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2941 PetscFunctionReturn(PETSC_SUCCESS); 2942 } 2943 2944 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2945 { 2946 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2947 2948 PetscFunctionBegin; 2949 *info = mumps->id.INFO(icntl); 2950 PetscFunctionReturn(PETSC_SUCCESS); 2951 } 2952 2953 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2954 { 2955 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2956 2957 PetscFunctionBegin; 2958 *infog = mumps->id.INFOG(icntl); 2959 PetscFunctionReturn(PETSC_SUCCESS); 2960 } 2961 2962 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2963 { 2964 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2965 2966 PetscFunctionBegin; 2967 *rinfo = mumps->id.RINFO(icntl); 2968 PetscFunctionReturn(PETSC_SUCCESS); 2969 } 2970 2971 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2972 { 2973 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2974 2975 PetscFunctionBegin; 2976 *rinfog = mumps->id.RINFOG(icntl); 2977 PetscFunctionReturn(PETSC_SUCCESS); 2978 } 2979 2980 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2981 { 2982 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2983 2984 PetscFunctionBegin; 2985 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2986 *size = 0; 2987 *array = NULL; 2988 if (!mumps->myid) { 2989 *size = mumps->id.INFOG(28); 2990 PetscCall(PetscMalloc1(*size, array)); 2991 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2992 } 2993 PetscFunctionReturn(PETSC_SUCCESS); 2994 } 2995 2996 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2997 { 2998 Mat Bt = NULL, Btseq = NULL; 2999 PetscBool flg; 3000 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3001 PetscScalar *aa; 3002 PetscInt spnr, *ia, *ja, M, nrhs; 3003 3004 PetscFunctionBegin; 3005 PetscAssertPointer(spRHS, 2); 3006 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 3007 if (flg) { 3008 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 3009 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 3010 3011 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3012 3013 if (mumps->petsc_size > 1) { 3014 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 3015 Btseq = b->A; 3016 } else { 3017 Btseq = Bt; 3018 } 3019 3020 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 3021 mumps->id.nrhs = nrhs; 3022 mumps->id.lrhs = M; 3023 mumps->id.rhs = NULL; 3024 3025 if (!mumps->myid) { 3026 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 3027 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3028 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3029 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3030 mumps->id.rhs_sparse = (MumpsScalar *)aa; 3031 } else { 3032 mumps->id.irhs_ptr = NULL; 3033 mumps->id.irhs_sparse = NULL; 3034 mumps->id.nz_rhs = 0; 3035 mumps->id.rhs_sparse = NULL; 3036 } 3037 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3038 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3039 3040 /* solve phase */ 3041 mumps->id.job = JOB_SOLVE; 3042 PetscMUMPS_c(mumps); 3043 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 3044 3045 if (!mumps->myid) { 3046 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3047 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3048 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3049 } 3050 PetscFunctionReturn(PETSC_SUCCESS); 3051 } 3052 3053 /*@ 3054 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 3055 3056 Logically Collective 3057 3058 Input Parameter: 3059 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3060 3061 Output Parameter: 3062 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3063 3064 Level: beginner 3065 3066 References: 3067 . * - MUMPS Users' Guide 3068 3069 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3070 @*/ 3071 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3072 { 3073 PetscFunctionBegin; 3074 PetscValidType(F, 1); 3075 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3076 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3077 PetscFunctionReturn(PETSC_SUCCESS); 3078 } 3079 3080 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3081 { 3082 Mat spRHS; 3083 3084 PetscFunctionBegin; 3085 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3086 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3087 PetscCall(MatDestroy(&spRHS)); 3088 PetscFunctionReturn(PETSC_SUCCESS); 3089 } 3090 3091 /*@ 3092 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 3093 3094 Logically Collective 3095 3096 Input Parameter: 3097 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3098 3099 Output Parameter: 3100 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3101 3102 Level: beginner 3103 3104 References: 3105 . * - MUMPS Users' Guide 3106 3107 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3108 @*/ 3109 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3110 { 3111 PetscBool flg; 3112 3113 PetscFunctionBegin; 3114 PetscValidType(F, 1); 3115 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3116 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3117 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3118 3119 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3120 PetscFunctionReturn(PETSC_SUCCESS); 3121 } 3122 3123 /*@ 3124 MatMumpsGetInfo - Get MUMPS parameter INFO() 3125 3126 Logically Collective 3127 3128 Input Parameters: 3129 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3130 - icntl - index of MUMPS parameter array INFO() 3131 3132 Output Parameter: 3133 . ival - value of MUMPS INFO(icntl) 3134 3135 Level: beginner 3136 3137 References: 3138 . * - MUMPS Users' Guide 3139 3140 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3141 @*/ 3142 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3143 { 3144 PetscFunctionBegin; 3145 PetscValidType(F, 1); 3146 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3147 PetscAssertPointer(ival, 3); 3148 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3149 PetscFunctionReturn(PETSC_SUCCESS); 3150 } 3151 3152 /*@ 3153 MatMumpsGetInfog - Get MUMPS parameter INFOG() 3154 3155 Logically Collective 3156 3157 Input Parameters: 3158 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3159 - icntl - index of MUMPS parameter array INFOG() 3160 3161 Output Parameter: 3162 . ival - value of MUMPS INFOG(icntl) 3163 3164 Level: beginner 3165 3166 References: 3167 . * - MUMPS Users' Guide 3168 3169 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3170 @*/ 3171 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3172 { 3173 PetscFunctionBegin; 3174 PetscValidType(F, 1); 3175 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3176 PetscAssertPointer(ival, 3); 3177 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3178 PetscFunctionReturn(PETSC_SUCCESS); 3179 } 3180 3181 /*@ 3182 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3183 3184 Logically Collective 3185 3186 Input Parameters: 3187 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3188 - icntl - index of MUMPS parameter array RINFO() 3189 3190 Output Parameter: 3191 . val - value of MUMPS RINFO(icntl) 3192 3193 Level: beginner 3194 3195 References: 3196 . * - MUMPS Users' Guide 3197 3198 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3199 @*/ 3200 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3201 { 3202 PetscFunctionBegin; 3203 PetscValidType(F, 1); 3204 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3205 PetscAssertPointer(val, 3); 3206 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3207 PetscFunctionReturn(PETSC_SUCCESS); 3208 } 3209 3210 /*@ 3211 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3212 3213 Logically Collective 3214 3215 Input Parameters: 3216 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3217 - icntl - index of MUMPS parameter array RINFOG() 3218 3219 Output Parameter: 3220 . val - value of MUMPS RINFOG(icntl) 3221 3222 Level: beginner 3223 3224 References: 3225 . * - MUMPS Users' Guide 3226 3227 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3228 @*/ 3229 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3230 { 3231 PetscFunctionBegin; 3232 PetscValidType(F, 1); 3233 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3234 PetscAssertPointer(val, 3); 3235 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3236 PetscFunctionReturn(PETSC_SUCCESS); 3237 } 3238 3239 /*@ 3240 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 3241 3242 Logically Collective 3243 3244 Input Parameter: 3245 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3246 3247 Output Parameters: 3248 + size - local size of the array. The size of the array is non-zero only on the host. 3249 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3250 for freeing this array. 3251 3252 Level: beginner 3253 3254 References: 3255 . * - MUMPS Users' Guide 3256 3257 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3258 @*/ 3259 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3260 { 3261 PetscFunctionBegin; 3262 PetscValidType(F, 1); 3263 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3264 PetscAssertPointer(size, 2); 3265 PetscAssertPointer(array, 3); 3266 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3267 PetscFunctionReturn(PETSC_SUCCESS); 3268 } 3269 3270 /*MC 3271 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3272 distributed and sequential matrices via the external package MUMPS. 3273 3274 Works with `MATAIJ` and `MATSBAIJ` matrices 3275 3276 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3277 3278 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3279 See details below. 3280 3281 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3282 3283 Options Database Keys: 3284 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3285 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3286 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3287 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3288 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3289 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3290 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3291 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3292 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3293 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3294 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3295 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3296 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3297 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3298 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3299 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3300 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3301 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3302 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3303 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3304 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3305 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 3306 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3307 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3308 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3309 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3310 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3311 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3312 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3313 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3314 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3315 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3316 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3317 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3318 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3319 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3320 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3321 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3322 3323 Level: beginner 3324 3325 Notes: 3326 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 3327 error if the matrix is Hermitian. 3328 3329 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3330 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3331 3332 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3333 the failure with 3334 .vb 3335 KSPGetPC(ksp,&pc); 3336 PCFactorGetMatrix(pc,&mat); 3337 MatMumpsGetInfo(mat,....); 3338 MatMumpsGetInfog(mat,....); etc. 3339 .ve 3340 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3341 3342 MUMPS provides 64-bit integer support in two build modes: 3343 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3344 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3345 3346 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3347 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3348 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3349 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3350 3351 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3352 3353 Two modes to run MUMPS/PETSc with OpenMP 3354 .vb 3355 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3356 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3357 .ve 3358 3359 .vb 3360 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3361 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3362 .ve 3363 3364 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3365 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3366 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3367 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3368 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3369 3370 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3371 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3372 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3373 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3374 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3375 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3376 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3377 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3378 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3379 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3380 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3381 examine the mapping result. 3382 3383 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3384 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3385 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3386 3387 References: 3388 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3389 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3390 3391 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3392 M*/ 3393 3394 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type) 3395 { 3396 PetscFunctionBegin; 3397 *type = MATSOLVERMUMPS; 3398 PetscFunctionReturn(PETSC_SUCCESS); 3399 } 3400 3401 /* MatGetFactor for Seq and MPI AIJ matrices */ 3402 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3403 { 3404 Mat B; 3405 Mat_MUMPS *mumps; 3406 PetscBool isSeqAIJ, isDiag, isDense; 3407 PetscMPIInt size; 3408 3409 PetscFunctionBegin; 3410 #if defined(PETSC_USE_COMPLEX) 3411 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3412 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3413 *F = NULL; 3414 PetscFunctionReturn(PETSC_SUCCESS); 3415 } 3416 #endif 3417 /* Create the factorization matrix */ 3418 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3419 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 3420 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3421 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3422 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3423 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3424 PetscCall(MatSetUp(B)); 3425 3426 PetscCall(PetscNew(&mumps)); 3427 3428 B->ops->view = MatView_MUMPS; 3429 B->ops->getinfo = MatGetInfo_MUMPS; 3430 3431 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3432 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3433 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3434 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3435 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3436 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3437 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3438 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3439 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3440 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3441 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3442 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3443 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3444 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3445 3446 if (ftype == MAT_FACTOR_LU) { 3447 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3448 B->factortype = MAT_FACTOR_LU; 3449 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3450 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3451 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3452 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3453 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3454 mumps->sym = 0; 3455 } else { 3456 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3457 B->factortype = MAT_FACTOR_CHOLESKY; 3458 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3459 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3460 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3461 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3462 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3463 #if defined(PETSC_USE_COMPLEX) 3464 mumps->sym = 2; 3465 #else 3466 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3467 else mumps->sym = 2; 3468 #endif 3469 } 3470 3471 /* set solvertype */ 3472 PetscCall(PetscFree(B->solvertype)); 3473 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3474 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3475 if (size == 1) { 3476 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3477 B->canuseordering = PETSC_TRUE; 3478 } 3479 B->ops->destroy = MatDestroy_MUMPS; 3480 B->data = (void *)mumps; 3481 3482 *F = B; 3483 mumps->id.job = JOB_NULL; 3484 mumps->ICNTL_pre = NULL; 3485 mumps->CNTL_pre = NULL; 3486 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3487 PetscFunctionReturn(PETSC_SUCCESS); 3488 } 3489 3490 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3491 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F) 3492 { 3493 Mat B; 3494 Mat_MUMPS *mumps; 3495 PetscBool isSeqSBAIJ; 3496 PetscMPIInt size; 3497 3498 PetscFunctionBegin; 3499 #if defined(PETSC_USE_COMPLEX) 3500 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3501 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3502 *F = NULL; 3503 PetscFunctionReturn(PETSC_SUCCESS); 3504 } 3505 #endif 3506 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3507 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3508 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3509 PetscCall(MatSetUp(B)); 3510 3511 PetscCall(PetscNew(&mumps)); 3512 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3513 if (isSeqSBAIJ) { 3514 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3515 } else { 3516 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3517 } 3518 3519 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3520 B->ops->view = MatView_MUMPS; 3521 B->ops->getinfo = MatGetInfo_MUMPS; 3522 3523 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3524 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3525 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3526 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3527 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3528 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3529 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3530 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3531 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3532 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3533 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3534 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3535 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3536 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3537 3538 B->factortype = MAT_FACTOR_CHOLESKY; 3539 #if defined(PETSC_USE_COMPLEX) 3540 mumps->sym = 2; 3541 #else 3542 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3543 else mumps->sym = 2; 3544 #endif 3545 3546 /* set solvertype */ 3547 PetscCall(PetscFree(B->solvertype)); 3548 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3549 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3550 if (size == 1) { 3551 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3552 B->canuseordering = PETSC_TRUE; 3553 } 3554 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3555 B->ops->destroy = MatDestroy_MUMPS; 3556 B->data = (void *)mumps; 3557 3558 *F = B; 3559 mumps->id.job = JOB_NULL; 3560 mumps->ICNTL_pre = NULL; 3561 mumps->CNTL_pre = NULL; 3562 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3563 PetscFunctionReturn(PETSC_SUCCESS); 3564 } 3565 3566 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3567 { 3568 Mat B; 3569 Mat_MUMPS *mumps; 3570 PetscBool isSeqBAIJ; 3571 PetscMPIInt size; 3572 3573 PetscFunctionBegin; 3574 /* Create the factorization matrix */ 3575 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3576 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3577 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3578 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3579 PetscCall(MatSetUp(B)); 3580 3581 PetscCall(PetscNew(&mumps)); 3582 if (ftype == MAT_FACTOR_LU) { 3583 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3584 B->factortype = MAT_FACTOR_LU; 3585 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3586 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3587 mumps->sym = 0; 3588 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3589 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3590 3591 B->ops->view = MatView_MUMPS; 3592 B->ops->getinfo = MatGetInfo_MUMPS; 3593 3594 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3595 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3596 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3597 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3598 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3599 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3600 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3601 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3602 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3603 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3604 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3605 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3606 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3607 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3608 3609 /* set solvertype */ 3610 PetscCall(PetscFree(B->solvertype)); 3611 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3612 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3613 if (size == 1) { 3614 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3615 B->canuseordering = PETSC_TRUE; 3616 } 3617 B->ops->destroy = MatDestroy_MUMPS; 3618 B->data = (void *)mumps; 3619 3620 *F = B; 3621 mumps->id.job = JOB_NULL; 3622 mumps->ICNTL_pre = NULL; 3623 mumps->CNTL_pre = NULL; 3624 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3625 PetscFunctionReturn(PETSC_SUCCESS); 3626 } 3627 3628 /* MatGetFactor for Seq and MPI SELL matrices */ 3629 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3630 { 3631 Mat B; 3632 Mat_MUMPS *mumps; 3633 PetscBool isSeqSELL; 3634 PetscMPIInt size; 3635 3636 PetscFunctionBegin; 3637 /* Create the factorization matrix */ 3638 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3639 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3640 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3641 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3642 PetscCall(MatSetUp(B)); 3643 3644 PetscCall(PetscNew(&mumps)); 3645 3646 B->ops->view = MatView_MUMPS; 3647 B->ops->getinfo = MatGetInfo_MUMPS; 3648 3649 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3650 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3651 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3652 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3653 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3654 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3655 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3656 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3657 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3658 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3659 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3660 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3661 3662 if (ftype == MAT_FACTOR_LU) { 3663 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3664 B->factortype = MAT_FACTOR_LU; 3665 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3666 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3667 mumps->sym = 0; 3668 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3669 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3670 3671 /* set solvertype */ 3672 PetscCall(PetscFree(B->solvertype)); 3673 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3674 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3675 if (size == 1) { 3676 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3677 B->canuseordering = PETSC_TRUE; 3678 } 3679 B->ops->destroy = MatDestroy_MUMPS; 3680 B->data = (void *)mumps; 3681 3682 *F = B; 3683 mumps->id.job = JOB_NULL; 3684 mumps->ICNTL_pre = NULL; 3685 mumps->CNTL_pre = NULL; 3686 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3687 PetscFunctionReturn(PETSC_SUCCESS); 3688 } 3689 3690 /* MatGetFactor for MATNEST matrices */ 3691 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3692 { 3693 Mat B, **mats; 3694 Mat_MUMPS *mumps; 3695 PetscInt nr, nc; 3696 PetscMPIInt size; 3697 PetscBool flg = PETSC_TRUE; 3698 3699 PetscFunctionBegin; 3700 #if defined(PETSC_USE_COMPLEX) 3701 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3702 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3703 *F = NULL; 3704 PetscFunctionReturn(PETSC_SUCCESS); 3705 } 3706 #endif 3707 3708 /* Return if some condition is not satisfied */ 3709 *F = NULL; 3710 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3711 if (ftype == MAT_FACTOR_CHOLESKY) { 3712 IS *rows, *cols; 3713 PetscInt *m, *M; 3714 3715 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3716 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3717 PetscCall(MatNestGetISs(A, rows, cols)); 3718 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3719 if (!flg) { 3720 PetscCall(PetscFree2(rows, cols)); 3721 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3722 PetscFunctionReturn(PETSC_SUCCESS); 3723 } 3724 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3725 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3726 for (PetscInt r = 0; flg && r < nr; r++) 3727 for (PetscInt k = r + 1; flg && k < nr; k++) 3728 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3729 PetscCall(PetscFree2(m, M)); 3730 PetscCall(PetscFree2(rows, cols)); 3731 if (!flg) { 3732 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3733 PetscFunctionReturn(PETSC_SUCCESS); 3734 } 3735 } 3736 3737 for (PetscInt r = 0; r < nr; r++) { 3738 for (PetscInt c = 0; c < nc; c++) { 3739 Mat sub = mats[r][c]; 3740 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isDiag, isDense; 3741 3742 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3743 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3744 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3745 else { 3746 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3747 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3748 } 3749 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3750 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3751 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3752 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3753 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3754 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3755 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 3756 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3757 if (ftype == MAT_FACTOR_CHOLESKY) { 3758 if (r == c) { 3759 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 3760 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3761 flg = PETSC_FALSE; 3762 } 3763 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3764 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3765 flg = PETSC_FALSE; 3766 } 3767 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3768 PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3769 flg = PETSC_FALSE; 3770 } 3771 } 3772 } 3773 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3774 3775 /* Create the factorization matrix */ 3776 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3777 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3778 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3779 PetscCall(MatSetUp(B)); 3780 3781 PetscCall(PetscNew(&mumps)); 3782 3783 B->ops->view = MatView_MUMPS; 3784 B->ops->getinfo = MatGetInfo_MUMPS; 3785 3786 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3787 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3788 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3789 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3790 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3791 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3792 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3793 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3794 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3795 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3796 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3797 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3798 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3799 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3800 3801 if (ftype == MAT_FACTOR_LU) { 3802 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3803 B->factortype = MAT_FACTOR_LU; 3804 mumps->sym = 0; 3805 } else { 3806 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3807 B->factortype = MAT_FACTOR_CHOLESKY; 3808 #if defined(PETSC_USE_COMPLEX) 3809 mumps->sym = 2; 3810 #else 3811 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3812 else mumps->sym = 2; 3813 #endif 3814 } 3815 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3816 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3817 3818 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3819 if (size == 1) { 3820 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3821 B->canuseordering = PETSC_TRUE; 3822 } 3823 3824 /* set solvertype */ 3825 PetscCall(PetscFree(B->solvertype)); 3826 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3827 B->ops->destroy = MatDestroy_MUMPS; 3828 B->data = (void *)mumps; 3829 3830 *F = B; 3831 mumps->id.job = JOB_NULL; 3832 mumps->ICNTL_pre = NULL; 3833 mumps->CNTL_pre = NULL; 3834 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3835 PetscFunctionReturn(PETSC_SUCCESS); 3836 } 3837 3838 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3839 { 3840 PetscFunctionBegin; 3841 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3842 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3843 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3844 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3845 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3846 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3847 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3848 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3849 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3850 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3851 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3852 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3853 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3854 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3855 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3856 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3857 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3858 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3859 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3860 PetscFunctionReturn(PETSC_SUCCESS); 3861 } 3862