1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #if defined(PETSC_USE_REAL_SINGLE) 15 #include <cmumps_c.h> 16 #else 17 #include <zmumps_c.h> 18 #endif 19 #else 20 #if defined(PETSC_USE_REAL_SINGLE) 21 #include <smumps_c.h> 22 #else 23 #include <dmumps_c.h> 24 #endif 25 #endif 26 EXTERN_C_END 27 #define JOB_INIT -1 28 #define JOB_NULL 0 29 #define JOB_FACTSYMBOLIC 1 30 #define JOB_FACTNUMERIC 2 31 #define JOB_SOLVE 3 32 #define JOB_END -2 33 34 /* calls to MUMPS */ 35 #if defined(PETSC_USE_COMPLEX) 36 #if defined(PETSC_USE_REAL_SINGLE) 37 #define MUMPS_c cmumps_c 38 #else 39 #define MUMPS_c zmumps_c 40 #endif 41 #else 42 #if defined(PETSC_USE_REAL_SINGLE) 43 #define MUMPS_c smumps_c 44 #else 45 #define MUMPS_c dmumps_c 46 #endif 47 #endif 48 49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 50 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 51 naming convention in PetscMPIInt, PetscBLASInt etc. 52 */ 53 typedef MUMPS_INT PetscMUMPSInt; 54 55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 56 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 57 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 58 #endif 59 #else 60 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 61 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 62 #endif 63 #endif 64 65 #define MPIU_MUMPSINT MPI_INT 66 #define PETSC_MUMPS_INT_MAX 2147483647 67 #define PETSC_MUMPS_INT_MIN -2147483648 68 69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 70 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b) 71 { 72 PetscFunctionBegin; 73 #if PetscDefined(USE_64BIT_INDICES) 74 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 75 #endif 76 *b = (PetscMUMPSInt)a; 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 /* Put these utility routines here since they are only used in this file */ 81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 82 { 83 PetscInt myval; 84 PetscBool myset; 85 86 PetscFunctionBegin; 87 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 88 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 89 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 90 if (set) *set = myset; 91 PetscFunctionReturn(PETSC_SUCCESS); 92 } 93 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 94 95 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 96 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 97 #define PetscMUMPS_c(mumps) \ 98 do { \ 99 if (mumps->use_petsc_omp_support) { \ 100 if (mumps->is_omp_master) { \ 101 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 102 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 103 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 104 PetscCall(PetscFPTrapPop()); \ 105 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 106 } \ 107 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 108 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 109 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 110 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 111 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 112 */ \ 113 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 115 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 116 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 117 } else { \ 118 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 119 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 120 PetscCall(PetscFPTrapPop()); \ 121 } \ 122 } while (0) 123 #else 124 #define PetscMUMPS_c(mumps) \ 125 do { \ 126 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 127 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 128 PetscCall(PetscFPTrapPop()); \ 129 } while (0) 130 #endif 131 132 /* declare MumpsScalar */ 133 #if defined(PETSC_USE_COMPLEX) 134 #if defined(PETSC_USE_REAL_SINGLE) 135 #define MumpsScalar mumps_complex 136 #else 137 #define MumpsScalar mumps_double_complex 138 #endif 139 #else 140 #define MumpsScalar PetscScalar 141 #endif 142 143 /* macros s.t. indices match MUMPS documentation */ 144 #define ICNTL(I) icntl[(I) - 1] 145 #define CNTL(I) cntl[(I) - 1] 146 #define INFOG(I) infog[(I) - 1] 147 #define INFO(I) info[(I) - 1] 148 #define RINFOG(I) rinfog[(I) - 1] 149 #define RINFO(I) rinfo[(I) - 1] 150 151 typedef struct Mat_MUMPS Mat_MUMPS; 152 struct Mat_MUMPS { 153 #if defined(PETSC_USE_COMPLEX) 154 #if defined(PETSC_USE_REAL_SINGLE) 155 CMUMPS_STRUC_C id; 156 #else 157 ZMUMPS_STRUC_C id; 158 #endif 159 #else 160 #if defined(PETSC_USE_REAL_SINGLE) 161 SMUMPS_STRUC_C id; 162 #else 163 DMUMPS_STRUC_C id; 164 #endif 165 #endif 166 167 MatStructure matstruc; 168 PetscMPIInt myid, petsc_size; 169 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 170 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 171 PetscCount nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 172 PetscMUMPSInt sym; 173 MPI_Comm mumps_comm; 174 PetscMUMPSInt *ICNTL_pre; 175 PetscReal *CNTL_pre; 176 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 177 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 178 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 179 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 180 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 181 PetscInt *rhs_nrow, max_nrhs; 182 PetscMPIInt *rhs_recvcounts, *rhs_disps; 183 PetscScalar *rhs_loc, *rhs_recvbuf; 184 #endif 185 Vec b_seq, x_seq; 186 PetscInt ninfo, *info; /* which INFO to display */ 187 PetscInt sizeredrhs; 188 PetscScalar *schur_sol; 189 PetscInt schur_sizesol; 190 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 191 PetscCount cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 192 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 193 194 /* Support for MATNEST */ 195 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 196 PetscCount *nest_vals_start; 197 PetscScalar *nest_vals; 198 199 /* stuff used by petsc/mumps OpenMP support*/ 200 PetscBool use_petsc_omp_support; 201 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 202 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */ 203 PetscCount *recvcount; /* a collection of nnz on omp_master */ 204 PetscMPIInt tag, omp_comm_size; 205 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 206 MPI_Request *reqs; 207 }; 208 209 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 210 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 211 */ 212 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 213 { 214 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */ 215 216 PetscFunctionBegin; 217 #if defined(PETSC_USE_64BIT_INDICES) 218 { 219 PetscInt i; 220 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 221 PetscCall(PetscFree(mumps->ia_alloc)); 222 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 223 mumps->cur_ilen = nrow + 1; 224 } 225 if (nnz > mumps->cur_jlen) { 226 PetscCall(PetscFree(mumps->ja_alloc)); 227 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 228 mumps->cur_jlen = nnz; 229 } 230 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i])); 231 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i])); 232 *ia_mumps = mumps->ia_alloc; 233 *ja_mumps = mumps->ja_alloc; 234 } 235 #else 236 *ia_mumps = ia; 237 *ja_mumps = ja; 238 #endif 239 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 240 PetscFunctionReturn(PETSC_SUCCESS); 241 } 242 243 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 244 { 245 PetscFunctionBegin; 246 PetscCall(PetscFree(mumps->id.listvar_schur)); 247 PetscCall(PetscFree(mumps->id.redrhs)); 248 PetscCall(PetscFree(mumps->schur_sol)); 249 mumps->id.size_schur = 0; 250 mumps->id.schur_lld = 0; 251 mumps->id.ICNTL(19) = 0; 252 PetscFunctionReturn(PETSC_SUCCESS); 253 } 254 255 /* solve with rhs in mumps->id.redrhs and return in the same location */ 256 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 257 { 258 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 259 Mat S, B, X; 260 MatFactorSchurStatus schurstatus; 261 PetscInt sizesol; 262 263 PetscFunctionBegin; 264 PetscCall(MatFactorFactorizeSchurComplement(F)); 265 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 266 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 267 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 268 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 269 PetscCall(MatBindToCPU(B, S->boundtocpu)); 270 #endif 271 switch (schurstatus) { 272 case MAT_FACTOR_SCHUR_FACTORED: 273 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 274 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 275 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 276 PetscCall(MatBindToCPU(X, S->boundtocpu)); 277 #endif 278 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 279 PetscCall(MatMatSolveTranspose(S, B, X)); 280 } else { 281 PetscCall(MatMatSolve(S, B, X)); 282 } 283 break; 284 case MAT_FACTOR_SCHUR_INVERTED: 285 sizesol = mumps->id.nrhs * mumps->id.size_schur; 286 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 287 PetscCall(PetscFree(mumps->schur_sol)); 288 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 289 mumps->schur_sizesol = sizesol; 290 } 291 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 292 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 294 PetscCall(MatBindToCPU(X, S->boundtocpu)); 295 #endif 296 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 297 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 298 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 299 } else { 300 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 301 } 302 PetscCall(MatProductSetFromOptions(X)); 303 PetscCall(MatProductSymbolic(X)); 304 PetscCall(MatProductNumeric(X)); 305 306 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 307 break; 308 default: 309 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 310 } 311 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 312 PetscCall(MatDestroy(&B)); 313 PetscCall(MatDestroy(&X)); 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 318 { 319 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 320 321 PetscFunctionBegin; 322 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 if (!expansion) { /* prepare for the condensation step */ 326 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 327 /* allocate MUMPS internal array to store reduced right-hand sides */ 328 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 329 PetscCall(PetscFree(mumps->id.redrhs)); 330 mumps->id.lredrhs = mumps->id.size_schur; 331 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 332 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 333 } 334 } else { /* prepare for the expansion step */ 335 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 336 PetscCall(MatMumpsSolveSchur_Private(F)); 337 mumps->id.ICNTL(26) = 2; /* expansion phase */ 338 PetscMUMPS_c(mumps); 339 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 340 /* restore defaults */ 341 mumps->id.ICNTL(26) = -1; 342 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 343 if (mumps->id.nrhs > 1) { 344 PetscCall(PetscFree(mumps->id.redrhs)); 345 mumps->id.lredrhs = 0; 346 mumps->sizeredrhs = 0; 347 } 348 } 349 PetscFunctionReturn(PETSC_SUCCESS); 350 } 351 352 /* 353 MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz] 354 355 input: 356 A - matrix in aij,baij or sbaij format 357 shift - 0: C style output triple; 1: Fortran style output triple. 358 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 359 MAT_REUSE_MATRIX: only the values in v array are updated 360 output: 361 nnz - dim of r, c, and v (number of local nonzero entries of A) 362 r, c, v - row and col index, matrix values (matrix triples) 363 364 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 365 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 366 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 367 368 */ 369 370 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 371 { 372 const PetscScalar *av; 373 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 374 PetscCount nz, rnz, k; 375 PetscMUMPSInt *row, *col; 376 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 377 378 PetscFunctionBegin; 379 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 380 if (reuse == MAT_INITIAL_MATRIX) { 381 nz = aa->nz; 382 ai = aa->i; 383 aj = aa->j; 384 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 385 for (PetscCount i = k = 0; i < M; i++) { 386 rnz = ai[i + 1] - ai[i]; 387 ajj = aj + ai[i]; 388 for (PetscCount j = 0; j < rnz; j++) { 389 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 390 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 391 k++; 392 } 393 } 394 mumps->val = (PetscScalar *)av; 395 mumps->irn = row; 396 mumps->jcn = col; 397 mumps->nnz = nz; 398 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 399 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 400 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 401 PetscFunctionReturn(PETSC_SUCCESS); 402 } 403 404 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 405 { 406 PetscCount nz, i, j, k, r; 407 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 408 PetscMUMPSInt *row, *col; 409 410 PetscFunctionBegin; 411 nz = a->sliidx[a->totalslices]; 412 if (reuse == MAT_INITIAL_MATRIX) { 413 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 414 for (i = k = 0; i < a->totalslices; i++) { 415 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 416 } 417 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 418 mumps->irn = row; 419 mumps->jcn = col; 420 mumps->nnz = nz; 421 mumps->val = a->val; 422 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 423 else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 424 PetscFunctionReturn(PETSC_SUCCESS); 425 } 426 427 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 428 { 429 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 430 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 431 PetscCount M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 432 PetscInt bs; 433 PetscMUMPSInt *row, *col; 434 435 PetscFunctionBegin; 436 if (reuse == MAT_INITIAL_MATRIX) { 437 PetscCall(MatGetBlockSize(A, &bs)); 438 M = A->rmap->N / bs; 439 ai = aa->i; 440 aj = aa->j; 441 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 442 for (i = 0; i < M; i++) { 443 ajj = aj + ai[i]; 444 rnz = ai[i + 1] - ai[i]; 445 for (k = 0; k < rnz; k++) { 446 for (j = 0; j < bs; j++) { 447 for (m = 0; m < bs; m++) { 448 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 449 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 450 idx++; 451 } 452 } 453 } 454 } 455 mumps->irn = row; 456 mumps->jcn = col; 457 mumps->nnz = nz; 458 mumps->val = aa->a; 459 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 460 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 461 PetscFunctionReturn(PETSC_SUCCESS); 462 } 463 464 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 465 { 466 const PetscInt *ai, *aj, *ajj; 467 PetscInt bs; 468 PetscCount nz, rnz, i, j, k, m; 469 PetscMUMPSInt *row, *col; 470 PetscScalar *val; 471 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 472 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 473 #if defined(PETSC_USE_COMPLEX) 474 PetscBool isset, hermitian; 475 #endif 476 477 PetscFunctionBegin; 478 #if defined(PETSC_USE_COMPLEX) 479 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 480 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 481 #endif 482 ai = aa->i; 483 aj = aa->j; 484 PetscCall(MatGetBlockSize(A, &bs)); 485 if (reuse == MAT_INITIAL_MATRIX) { 486 const PetscCount alloc_size = aa->nz * bs2; 487 488 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 489 if (bs > 1) { 490 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 491 mumps->val = mumps->val_alloc; 492 } else { 493 mumps->val = aa->a; 494 } 495 mumps->irn = row; 496 mumps->jcn = col; 497 } else { 498 row = mumps->irn; 499 col = mumps->jcn; 500 } 501 val = mumps->val; 502 503 nz = 0; 504 if (bs > 1) { 505 for (i = 0; i < mbs; i++) { 506 rnz = ai[i + 1] - ai[i]; 507 ajj = aj + ai[i]; 508 for (j = 0; j < rnz; j++) { 509 for (k = 0; k < bs; k++) { 510 for (m = 0; m < bs; m++) { 511 if (ajj[j] > i || k >= m) { 512 if (reuse == MAT_INITIAL_MATRIX) { 513 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 514 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 515 } 516 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 517 } 518 } 519 } 520 } 521 } 522 } else if (reuse == MAT_INITIAL_MATRIX) { 523 for (i = 0; i < mbs; i++) { 524 rnz = ai[i + 1] - ai[i]; 525 ajj = aj + ai[i]; 526 for (j = 0; j < rnz; j++) { 527 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 528 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 529 nz++; 530 } 531 } 532 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz); 533 } else if (mumps->nest_vals) 534 PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 535 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 536 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 537 PetscFunctionReturn(PETSC_SUCCESS); 538 } 539 540 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 541 { 542 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 543 PetscCount nz, rnz, i, j; 544 const PetscScalar *av, *v1; 545 PetscScalar *val; 546 PetscMUMPSInt *row, *col; 547 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 548 PetscBool missing; 549 #if defined(PETSC_USE_COMPLEX) 550 PetscBool hermitian, isset; 551 #endif 552 553 PetscFunctionBegin; 554 #if defined(PETSC_USE_COMPLEX) 555 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 556 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 557 #endif 558 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 559 ai = aa->i; 560 aj = aa->j; 561 adiag = aa->diag; 562 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 563 if (reuse == MAT_INITIAL_MATRIX) { 564 /* count nz in the upper triangular part of A */ 565 nz = 0; 566 if (missing) { 567 for (i = 0; i < M; i++) { 568 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 569 for (j = ai[i]; j < ai[i + 1]; j++) { 570 if (aj[j] < i) continue; 571 nz++; 572 } 573 } else { 574 nz += ai[i + 1] - adiag[i]; 575 } 576 } 577 } else { 578 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 579 } 580 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 581 PetscCall(PetscMalloc1(nz, &val)); 582 mumps->nnz = nz; 583 mumps->irn = row; 584 mumps->jcn = col; 585 mumps->val = mumps->val_alloc = val; 586 587 nz = 0; 588 if (missing) { 589 for (i = 0; i < M; i++) { 590 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 591 for (j = ai[i]; j < ai[i + 1]; j++) { 592 if (aj[j] < i) continue; 593 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 594 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 595 val[nz] = av[j]; 596 nz++; 597 } 598 } else { 599 rnz = ai[i + 1] - adiag[i]; 600 ajj = aj + adiag[i]; 601 v1 = av + adiag[i]; 602 for (j = 0; j < rnz; j++) { 603 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 604 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 605 val[nz++] = v1[j]; 606 } 607 } 608 } 609 } else { 610 for (i = 0; i < M; i++) { 611 rnz = ai[i + 1] - adiag[i]; 612 ajj = aj + adiag[i]; 613 v1 = av + adiag[i]; 614 for (j = 0; j < rnz; j++) { 615 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 616 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 617 val[nz++] = v1[j]; 618 } 619 } 620 } 621 } else { 622 nz = 0; 623 val = mumps->val; 624 if (missing) { 625 for (i = 0; i < M; i++) { 626 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 627 for (j = ai[i]; j < ai[i + 1]; j++) { 628 if (aj[j] < i) continue; 629 val[nz++] = av[j]; 630 } 631 } else { 632 rnz = ai[i + 1] - adiag[i]; 633 v1 = av + adiag[i]; 634 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 635 } 636 } 637 } else { 638 for (i = 0; i < M; i++) { 639 rnz = ai[i + 1] - adiag[i]; 640 v1 = av + adiag[i]; 641 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 642 } 643 } 644 } 645 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 646 PetscFunctionReturn(PETSC_SUCCESS); 647 } 648 649 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 650 { 651 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 652 PetscInt bs; 653 PetscCount rstart, nz, i, j, k, m, jj, irow, countA, countB; 654 PetscMUMPSInt *row, *col; 655 const PetscScalar *av, *bv, *v1, *v2; 656 PetscScalar *val; 657 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 658 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)mat->A->data; 659 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 660 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 661 #if defined(PETSC_USE_COMPLEX) 662 PetscBool hermitian, isset; 663 #endif 664 665 PetscFunctionBegin; 666 #if defined(PETSC_USE_COMPLEX) 667 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 668 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 669 #endif 670 PetscCall(MatGetBlockSize(A, &bs)); 671 rstart = A->rmap->rstart; 672 ai = aa->i; 673 aj = aa->j; 674 bi = bb->i; 675 bj = bb->j; 676 av = aa->a; 677 bv = bb->a; 678 679 garray = mat->garray; 680 681 if (reuse == MAT_INITIAL_MATRIX) { 682 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 683 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 684 PetscCall(PetscMalloc1(nz, &val)); 685 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 686 mumps->irn = row; 687 mumps->jcn = col; 688 mumps->val = mumps->val_alloc = val; 689 } else { 690 val = mumps->val; 691 } 692 693 jj = 0; 694 irow = rstart; 695 for (i = 0; i < mbs; i++) { 696 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 697 countA = ai[i + 1] - ai[i]; 698 countB = bi[i + 1] - bi[i]; 699 bjj = bj + bi[i]; 700 v1 = av + ai[i] * bs2; 701 v2 = bv + bi[i] * bs2; 702 703 if (bs > 1) { 704 /* A-part */ 705 for (j = 0; j < countA; j++) { 706 for (k = 0; k < bs; k++) { 707 for (m = 0; m < bs; m++) { 708 if (rstart + ajj[j] * bs > irow || k >= m) { 709 if (reuse == MAT_INITIAL_MATRIX) { 710 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 711 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 712 } 713 val[jj++] = v1[j * bs2 + m + k * bs]; 714 } 715 } 716 } 717 } 718 719 /* B-part */ 720 for (j = 0; j < countB; j++) { 721 for (k = 0; k < bs; k++) { 722 for (m = 0; m < bs; m++) { 723 if (reuse == MAT_INITIAL_MATRIX) { 724 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 725 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 726 } 727 val[jj++] = v2[j * bs2 + m + k * bs]; 728 } 729 } 730 } 731 } else { 732 /* A-part */ 733 for (j = 0; j < countA; j++) { 734 if (reuse == MAT_INITIAL_MATRIX) { 735 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 736 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 737 } 738 val[jj++] = v1[j]; 739 } 740 741 /* B-part */ 742 for (j = 0; j < countB; j++) { 743 if (reuse == MAT_INITIAL_MATRIX) { 744 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 745 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 746 } 747 val[jj++] = v2[j]; 748 } 749 } 750 irow += bs; 751 } 752 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 753 PetscFunctionReturn(PETSC_SUCCESS); 754 } 755 756 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 757 { 758 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 759 PetscCount rstart, cstart, nz, i, j, jj, irow, countA, countB; 760 PetscMUMPSInt *row, *col; 761 const PetscScalar *av, *bv, *v1, *v2; 762 PetscScalar *val; 763 Mat Ad, Ao; 764 Mat_SeqAIJ *aa; 765 Mat_SeqAIJ *bb; 766 767 PetscFunctionBegin; 768 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 769 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 770 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 771 772 aa = (Mat_SeqAIJ *)Ad->data; 773 bb = (Mat_SeqAIJ *)Ao->data; 774 ai = aa->i; 775 aj = aa->j; 776 bi = bb->i; 777 bj = bb->j; 778 779 rstart = A->rmap->rstart; 780 cstart = A->cmap->rstart; 781 782 if (reuse == MAT_INITIAL_MATRIX) { 783 nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 784 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 785 PetscCall(PetscMalloc1(nz, &val)); 786 mumps->nnz = nz; 787 mumps->irn = row; 788 mumps->jcn = col; 789 mumps->val = mumps->val_alloc = val; 790 } else { 791 val = mumps->val; 792 } 793 794 jj = 0; 795 irow = rstart; 796 for (i = 0; i < m; i++) { 797 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 798 countA = ai[i + 1] - ai[i]; 799 countB = bi[i + 1] - bi[i]; 800 bjj = bj + bi[i]; 801 v1 = av + ai[i]; 802 v2 = bv + bi[i]; 803 804 /* A-part */ 805 for (j = 0; j < countA; j++) { 806 if (reuse == MAT_INITIAL_MATRIX) { 807 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 808 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 809 } 810 val[jj++] = v1[j]; 811 } 812 813 /* B-part */ 814 for (j = 0; j < countB; j++) { 815 if (reuse == MAT_INITIAL_MATRIX) { 816 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 817 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 818 } 819 val[jj++] = v2[j]; 820 } 821 irow++; 822 } 823 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 824 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 825 PetscFunctionReturn(PETSC_SUCCESS); 826 } 827 828 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 829 { 830 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 831 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)mat->A->data; 832 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 833 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 834 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 835 const PetscInt bs2 = mat->bs2; 836 PetscInt bs; 837 PetscCount nz, i, j, k, n, jj, irow, countA, countB, idx; 838 PetscMUMPSInt *row, *col; 839 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 840 PetscScalar *val; 841 842 PetscFunctionBegin; 843 PetscCall(MatGetBlockSize(A, &bs)); 844 if (reuse == MAT_INITIAL_MATRIX) { 845 nz = bs2 * (aa->nz + bb->nz); 846 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 847 PetscCall(PetscMalloc1(nz, &val)); 848 mumps->nnz = nz; 849 mumps->irn = row; 850 mumps->jcn = col; 851 mumps->val = mumps->val_alloc = val; 852 } else { 853 val = mumps->val; 854 } 855 856 jj = 0; 857 irow = rstart; 858 for (i = 0; i < mbs; i++) { 859 countA = ai[i + 1] - ai[i]; 860 countB = bi[i + 1] - bi[i]; 861 ajj = aj + ai[i]; 862 bjj = bj + bi[i]; 863 v1 = av + bs2 * ai[i]; 864 v2 = bv + bs2 * bi[i]; 865 866 idx = 0; 867 /* A-part */ 868 for (k = 0; k < countA; k++) { 869 for (j = 0; j < bs; j++) { 870 for (n = 0; n < bs; n++) { 871 if (reuse == MAT_INITIAL_MATRIX) { 872 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 873 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 874 } 875 val[jj++] = v1[idx++]; 876 } 877 } 878 } 879 880 idx = 0; 881 /* B-part */ 882 for (k = 0; k < countB; k++) { 883 for (j = 0; j < bs; j++) { 884 for (n = 0; n < bs; n++) { 885 if (reuse == MAT_INITIAL_MATRIX) { 886 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 887 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 888 } 889 val[jj++] = v2[idx++]; 890 } 891 } 892 } 893 irow += bs; 894 } 895 PetscFunctionReturn(PETSC_SUCCESS); 896 } 897 898 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 899 { 900 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 901 PetscCount rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 902 PetscMUMPSInt *row, *col; 903 const PetscScalar *av, *bv, *v1, *v2; 904 PetscScalar *val; 905 Mat Ad, Ao; 906 Mat_SeqAIJ *aa; 907 Mat_SeqAIJ *bb; 908 #if defined(PETSC_USE_COMPLEX) 909 PetscBool hermitian, isset; 910 #endif 911 912 PetscFunctionBegin; 913 #if defined(PETSC_USE_COMPLEX) 914 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 915 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 916 #endif 917 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 918 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 919 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 920 921 aa = (Mat_SeqAIJ *)Ad->data; 922 bb = (Mat_SeqAIJ *)Ao->data; 923 ai = aa->i; 924 aj = aa->j; 925 adiag = aa->diag; 926 bi = bb->i; 927 bj = bb->j; 928 929 rstart = A->rmap->rstart; 930 931 if (reuse == MAT_INITIAL_MATRIX) { 932 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 933 nzb = 0; /* num of upper triangular entries in mat->B */ 934 for (i = 0; i < m; i++) { 935 nza += (ai[i + 1] - adiag[i]); 936 countB = bi[i + 1] - bi[i]; 937 bjj = bj + bi[i]; 938 for (j = 0; j < countB; j++) { 939 if (garray[bjj[j]] > rstart) nzb++; 940 } 941 } 942 943 nz = nza + nzb; /* total nz of upper triangular part of mat */ 944 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 945 PetscCall(PetscMalloc1(nz, &val)); 946 mumps->nnz = nz; 947 mumps->irn = row; 948 mumps->jcn = col; 949 mumps->val = mumps->val_alloc = val; 950 } else { 951 val = mumps->val; 952 } 953 954 jj = 0; 955 irow = rstart; 956 for (i = 0; i < m; i++) { 957 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 958 v1 = av + adiag[i]; 959 countA = ai[i + 1] - adiag[i]; 960 countB = bi[i + 1] - bi[i]; 961 bjj = bj + bi[i]; 962 v2 = bv + bi[i]; 963 964 /* A-part */ 965 for (j = 0; j < countA; j++) { 966 if (reuse == MAT_INITIAL_MATRIX) { 967 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 968 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 969 } 970 val[jj++] = v1[j]; 971 } 972 973 /* B-part */ 974 for (j = 0; j < countB; j++) { 975 if (garray[bjj[j]] > rstart) { 976 if (reuse == MAT_INITIAL_MATRIX) { 977 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 978 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 979 } 980 val[jj++] = v2[j]; 981 } 982 } 983 irow++; 984 } 985 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 986 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 987 PetscFunctionReturn(PETSC_SUCCESS); 988 } 989 990 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 991 { 992 const PetscScalar *av; 993 const PetscInt M = A->rmap->n; 994 PetscCount i; 995 PetscMUMPSInt *row, *col; 996 Vec v; 997 998 PetscFunctionBegin; 999 PetscCall(MatDiagonalGetDiagonal(A, &v)); 1000 PetscCall(VecGetArrayRead(v, &av)); 1001 if (reuse == MAT_INITIAL_MATRIX) { 1002 PetscCall(PetscMalloc2(M, &row, M, &col)); 1003 for (i = 0; i < M; i++) { 1004 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1005 col[i] = row[i]; 1006 } 1007 mumps->val = (PetscScalar *)av; 1008 mumps->irn = row; 1009 mumps->jcn = col; 1010 mumps->nnz = M; 1011 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1012 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1013 PetscCall(VecRestoreArrayRead(v, &av)); 1014 PetscFunctionReturn(PETSC_SUCCESS); 1015 } 1016 1017 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1018 { 1019 PetscScalar *v; 1020 const PetscInt m = A->rmap->n, N = A->cmap->N; 1021 PetscInt lda; 1022 PetscCount i, j; 1023 PetscMUMPSInt *row, *col; 1024 1025 PetscFunctionBegin; 1026 PetscCall(MatDenseGetArray(A, &v)); 1027 PetscCall(MatDenseGetLDA(A, &lda)); 1028 if (reuse == MAT_INITIAL_MATRIX) { 1029 PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 1030 for (i = 0; i < m; i++) { 1031 col[i] = 0; 1032 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1033 } 1034 for (j = 1; j < N; j++) { 1035 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 1036 PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 1037 } 1038 if (lda == m) mumps->val = v; 1039 else { 1040 PetscCall(PetscMalloc1(m * N, &mumps->val)); 1041 mumps->val_alloc = mumps->val; 1042 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1043 } 1044 mumps->irn = row; 1045 mumps->jcn = col; 1046 mumps->nnz = m * N; 1047 } else { 1048 if (lda == m && !mumps->nest_vals) mumps->val = v; 1049 else { 1050 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1051 } 1052 } 1053 PetscCall(MatDenseRestoreArray(A, &v)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1058 { 1059 Mat **mats; 1060 PetscInt nr, nc; 1061 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1062 1063 PetscFunctionBegin; 1064 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1065 if (reuse == MAT_INITIAL_MATRIX) { 1066 PetscMUMPSInt *irns, *jcns; 1067 PetscScalar *vals; 1068 PetscCount totnnz, cumnnz, maxnnz; 1069 PetscInt *pjcns_w; 1070 IS *rows, *cols; 1071 PetscInt **rows_idx, **cols_idx; 1072 1073 cumnnz = 0; 1074 maxnnz = 0; 1075 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1076 for (PetscInt r = 0; r < nr; r++) { 1077 for (PetscInt c = 0; c < nc; c++) { 1078 Mat sub = mats[r][c]; 1079 1080 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1081 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1082 if (sub) { 1083 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1084 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE, isDiag, isDense; 1085 MatInfo info; 1086 1087 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1088 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1089 else { 1090 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1091 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1092 } 1093 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1094 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1095 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1096 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1097 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1098 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1099 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1100 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 1101 1102 if (chol) { 1103 if (r == c) { 1104 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1105 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1106 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1107 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1108 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1109 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1110 } else { 1111 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1112 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1113 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1114 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1115 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1116 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1117 } 1118 } else { 1119 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1120 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1121 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1122 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1123 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1124 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1125 } 1126 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1127 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1128 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1129 cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */ 1130 maxnnz = PetscMax(maxnnz, info.nz_used); 1131 } 1132 } 1133 } 1134 1135 /* Allocate total COO */ 1136 totnnz = cumnnz; 1137 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1138 PetscCall(PetscMalloc1(totnnz, &vals)); 1139 1140 /* Handle rows and column maps 1141 We directly map rows and use an SF for the columns */ 1142 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1143 PetscCall(MatNestGetISs(A, rows, cols)); 1144 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1145 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1146 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1147 else (void)maxnnz; 1148 1149 cumnnz = 0; 1150 for (PetscInt r = 0; r < nr; r++) { 1151 for (PetscInt c = 0; c < nc; c++) { 1152 Mat sub = mats[r][c]; 1153 const PetscInt *ridx = rows_idx[r]; 1154 const PetscInt *cidx = cols_idx[c]; 1155 PetscInt rst; 1156 PetscSF csf; 1157 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1158 PetscLayout cmap; 1159 PetscInt innz; 1160 1161 mumps->nest_vals_start[r * nc + c] = cumnnz; 1162 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1163 1164 /* Extract inner blocks if needed */ 1165 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1166 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1167 else { 1168 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1169 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1170 } 1171 swap = (PetscBool)(isTrans || isHTrans); 1172 1173 /* Get column layout to map off-process columns */ 1174 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1175 1176 /* Get row start to map on-process rows */ 1177 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1178 1179 /* Directly use the mumps datastructure and use C ordering for now */ 1180 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1181 1182 /* Swap the role of rows and columns indices for transposed blocks 1183 since we need values with global final ordering */ 1184 if (swap) { 1185 cidx = rows_idx[r]; 1186 ridx = cols_idx[c]; 1187 } 1188 1189 /* Communicate column indices 1190 This could have been done with a single SF but it would have complicated the code a lot. 1191 But since we do it only once, we pay the price of setting up an SF for each block */ 1192 if (PetscDefined(USE_64BIT_INDICES)) { 1193 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1194 } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */ 1195 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1196 PetscCall(PetscIntCast(mumps->nnz, &innz)); 1197 PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1198 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1199 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1200 PetscCall(PetscSFDestroy(&csf)); 1201 1202 /* Import indices: use direct map for rows and mapped indices for columns */ 1203 if (swap) { 1204 for (PetscInt k = 0; k < mumps->nnz; k++) { 1205 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1206 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1207 } 1208 } else { 1209 for (PetscInt k = 0; k < mumps->nnz; k++) { 1210 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1211 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1212 } 1213 } 1214 1215 /* Import values to full COO */ 1216 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1217 if (isHTrans) { /* conjugate the entries */ 1218 PetscScalar *v = vals + cumnnz; 1219 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]); 1220 } 1221 1222 /* Shift new starting point and sanity check */ 1223 cumnnz += mumps->nnz; 1224 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1225 1226 /* Free scratch memory */ 1227 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1228 PetscCall(PetscFree(mumps->val_alloc)); 1229 mumps->val = NULL; 1230 mumps->nnz = 0; 1231 } 1232 } 1233 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1234 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1235 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1236 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1237 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1238 mumps->nest_vals_start[nr * nc] = cumnnz; 1239 1240 /* Set pointers for final MUMPS data structure */ 1241 mumps->nest_vals = vals; 1242 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1243 mumps->val = vals; 1244 mumps->irn = irns; 1245 mumps->jcn = jcns; 1246 mumps->nnz = cumnnz; 1247 } else { 1248 PetscScalar *oval = mumps->nest_vals; 1249 for (PetscInt r = 0; r < nr; r++) { 1250 for (PetscInt c = 0; c < nc; c++) { 1251 PetscBool isTrans, isHTrans = PETSC_FALSE; 1252 Mat sub = mats[r][c]; 1253 PetscInt midx = r * nc + c; 1254 1255 if (!mumps->nest_convert_to_triples[midx]) continue; 1256 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1257 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1258 else { 1259 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1260 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1261 } 1262 mumps->val = oval + mumps->nest_vals_start[midx]; 1263 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1264 if (isHTrans) { 1265 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1266 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1267 } 1268 } 1269 } 1270 mumps->val = oval; 1271 } 1272 PetscFunctionReturn(PETSC_SUCCESS); 1273 } 1274 1275 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1276 { 1277 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1278 1279 PetscFunctionBegin; 1280 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1281 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1282 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1283 PetscCall(VecDestroy(&mumps->b_seq)); 1284 PetscCall(VecDestroy(&mumps->x_seq)); 1285 PetscCall(PetscFree(mumps->id.perm_in)); 1286 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1287 PetscCall(PetscFree(mumps->val_alloc)); 1288 PetscCall(PetscFree(mumps->info)); 1289 PetscCall(PetscFree(mumps->ICNTL_pre)); 1290 PetscCall(PetscFree(mumps->CNTL_pre)); 1291 PetscCall(MatMumpsResetSchur_Private(mumps)); 1292 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1293 mumps->id.job = JOB_END; 1294 PetscMUMPS_c(mumps); 1295 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1296 if (mumps->mumps_comm != MPI_COMM_NULL) { 1297 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1298 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1299 } 1300 } 1301 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1302 if (mumps->use_petsc_omp_support) { 1303 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1304 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1305 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1306 } 1307 #endif 1308 PetscCall(PetscFree(mumps->ia_alloc)); 1309 PetscCall(PetscFree(mumps->ja_alloc)); 1310 PetscCall(PetscFree(mumps->recvcount)); 1311 PetscCall(PetscFree(mumps->reqs)); 1312 PetscCall(PetscFree(mumps->irhs_loc)); 1313 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1314 PetscCall(PetscFree(mumps->nest_vals)); 1315 PetscCall(PetscFree(A->data)); 1316 1317 /* clear composed functions */ 1318 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1319 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1320 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1321 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1322 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1323 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1324 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1325 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1326 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1327 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1328 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1329 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1330 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1331 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1332 PetscFunctionReturn(PETSC_SUCCESS); 1333 } 1334 1335 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1336 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1337 { 1338 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1339 const PetscMPIInt ompsize = mumps->omp_comm_size; 1340 PetscInt i, m, M, rstart; 1341 1342 PetscFunctionBegin; 1343 PetscCall(MatGetSize(A, &M, NULL)); 1344 PetscCall(MatGetLocalSize(A, &m, NULL)); 1345 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1346 if (ompsize == 1) { 1347 if (!mumps->irhs_loc) { 1348 mumps->nloc_rhs = (PetscMUMPSInt)m; 1349 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1350 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1351 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */ 1352 } 1353 mumps->id.rhs_loc = (MumpsScalar *)array; 1354 } else { 1355 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1356 const PetscInt *ranges; 1357 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1358 MPI_Group petsc_group, omp_group; 1359 PetscScalar *recvbuf = NULL; 1360 1361 if (mumps->is_omp_master) { 1362 /* Lazily initialize the omp stuff for distributed rhs */ 1363 if (!mumps->irhs_loc) { 1364 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1365 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1366 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1367 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1368 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1369 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1370 1371 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1372 mumps->nloc_rhs = 0; 1373 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1374 for (j = 0; j < ompsize; j++) { 1375 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1376 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1377 } 1378 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1379 for (j = k = 0; j < ompsize; j++) { 1380 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1381 } 1382 1383 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1384 PetscCallMPI(MPI_Group_free(&petsc_group)); 1385 PetscCallMPI(MPI_Group_free(&omp_group)); 1386 } 1387 1388 /* Realloc buffers when current nrhs is bigger than what we have met */ 1389 if (nrhs > mumps->max_nrhs) { 1390 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1391 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1392 mumps->max_nrhs = nrhs; 1393 } 1394 1395 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1396 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1397 mumps->rhs_disps[0] = 0; 1398 for (j = 1; j < ompsize; j++) { 1399 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1400 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1401 } 1402 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1403 } 1404 1405 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1406 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1407 1408 if (mumps->is_omp_master) { 1409 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1410 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1411 for (j = 0; j < ompsize; j++) { 1412 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1413 dst = dstbase; 1414 for (i = 0; i < nrhs; i++) { 1415 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1416 src += mumps->rhs_nrow[j]; 1417 dst += mumps->nloc_rhs; 1418 } 1419 dstbase += mumps->rhs_nrow[j]; 1420 } 1421 } 1422 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1423 } 1424 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1425 } 1426 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 1427 mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs; 1428 mumps->id.lrhs_loc = mumps->nloc_rhs; 1429 mumps->id.irhs_loc = mumps->irhs_loc; 1430 PetscFunctionReturn(PETSC_SUCCESS); 1431 } 1432 1433 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1434 { 1435 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1436 const PetscScalar *rarray = NULL; 1437 PetscScalar *array; 1438 IS is_iden, is_petsc; 1439 PetscInt i; 1440 PetscBool second_solve = PETSC_FALSE; 1441 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1442 1443 PetscFunctionBegin; 1444 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1445 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1446 &cite1)); 1447 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1448 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1449 &cite2)); 1450 1451 PetscCall(VecFlag(x, A->factorerrortype)); 1452 if (A->factorerrortype) { 1453 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1454 PetscFunctionReturn(PETSC_SUCCESS); 1455 } 1456 1457 mumps->id.nrhs = 1; 1458 if (mumps->petsc_size > 1) { 1459 if (mumps->ICNTL20 == 10) { 1460 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1461 PetscCall(VecGetArrayRead(b, &rarray)); 1462 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1463 } else { 1464 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1465 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1466 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1467 if (!mumps->myid) { 1468 PetscCall(VecGetArray(mumps->b_seq, &array)); 1469 mumps->id.rhs = (MumpsScalar *)array; 1470 } 1471 } 1472 } else { /* petsc_size == 1 */ 1473 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1474 PetscCall(VecCopy(b, x)); 1475 PetscCall(VecGetArray(x, &array)); 1476 mumps->id.rhs = (MumpsScalar *)array; 1477 } 1478 1479 /* 1480 handle condensation step of Schur complement (if any) 1481 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1482 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1483 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1484 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1485 */ 1486 if (mumps->id.size_schur > 0) { 1487 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1488 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1489 second_solve = PETSC_TRUE; 1490 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1491 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1492 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1493 } 1494 /* solve phase */ 1495 mumps->id.job = JOB_SOLVE; 1496 PetscMUMPS_c(mumps); 1497 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1498 1499 /* handle expansion step of Schur complement (if any) */ 1500 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1501 else if (mumps->id.ICNTL(26) == 1) { 1502 PetscCall(MatMumpsSolveSchur_Private(A)); 1503 for (i = 0; i < mumps->id.size_schur; ++i) { 1504 #if !defined(PETSC_USE_COMPLEX) 1505 PetscScalar val = mumps->id.redrhs[i]; 1506 #else 1507 PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i; 1508 #endif 1509 array[mumps->id.listvar_schur[i] - 1] = val; 1510 } 1511 } 1512 1513 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */ 1514 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1515 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1516 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1517 } 1518 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1519 PetscInt *isol2_loc = NULL; 1520 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1521 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1522 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1523 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1524 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1525 PetscCall(ISDestroy(&is_iden)); 1526 PetscCall(ISDestroy(&is_petsc)); 1527 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1528 } 1529 1530 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1531 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1532 } 1533 1534 if (mumps->petsc_size > 1) { 1535 if (mumps->ICNTL20 == 10) { 1536 PetscCall(VecRestoreArrayRead(b, &rarray)); 1537 } else if (!mumps->myid) { 1538 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1539 } 1540 } else PetscCall(VecRestoreArray(x, &array)); 1541 1542 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1543 PetscFunctionReturn(PETSC_SUCCESS); 1544 } 1545 1546 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1547 { 1548 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1549 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1550 1551 PetscFunctionBegin; 1552 mumps->id.ICNTL(9) = 0; 1553 PetscCall(MatSolve_MUMPS(A, b, x)); 1554 mumps->id.ICNTL(9) = value; 1555 PetscFunctionReturn(PETSC_SUCCESS); 1556 } 1557 1558 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1559 { 1560 Mat Bt = NULL; 1561 PetscBool denseX, denseB, flg, flgT; 1562 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1563 PetscInt i, nrhs, M, nrhsM; 1564 PetscScalar *array; 1565 const PetscScalar *rbray; 1566 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1567 PetscMUMPSInt *isol_loc, *isol_loc_save; 1568 PetscScalar *bray, *sol_loc, *sol_loc_save; 1569 IS is_to, is_from; 1570 PetscInt k, proc, j, m, myrstart; 1571 const PetscInt *rstart; 1572 Vec v_mpi, msol_loc; 1573 VecScatter scat_sol; 1574 Vec b_seq; 1575 VecScatter scat_rhs; 1576 PetscScalar *aa; 1577 PetscInt spnr, *ia, *ja; 1578 Mat_MPIAIJ *b = NULL; 1579 1580 PetscFunctionBegin; 1581 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1582 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1583 1584 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1585 if (denseB) { 1586 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1587 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1588 } else { /* sparse B */ 1589 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1590 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1591 if (flgT) { /* input B is transpose of actual RHS matrix, 1592 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1593 PetscCall(MatTransposeGetMat(B, &Bt)); 1594 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1595 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1596 } 1597 1598 PetscCall(MatGetSize(B, &M, &nrhs)); 1599 PetscCall(PetscIntMultError(nrhs, M, &nrhsM)); 1600 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 1601 mumps->id.lrhs = (PetscMUMPSInt)M; 1602 mumps->id.rhs = NULL; 1603 1604 if (mumps->petsc_size == 1) { 1605 PetscScalar *aa; 1606 PetscInt spnr, *ia, *ja; 1607 PetscBool second_solve = PETSC_FALSE; 1608 1609 PetscCall(MatDenseGetArray(X, &array)); 1610 mumps->id.rhs = (MumpsScalar *)array; 1611 1612 if (denseB) { 1613 /* copy B to X */ 1614 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1615 PetscCall(PetscArraycpy(array, rbray, nrhsM)); 1616 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1617 } else { /* sparse B */ 1618 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1619 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1620 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1621 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1622 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1623 } 1624 /* handle condensation step of Schur complement (if any) */ 1625 if (mumps->id.size_schur > 0) { 1626 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1627 second_solve = PETSC_TRUE; 1628 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1629 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1630 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1631 } 1632 /* solve phase */ 1633 mumps->id.job = JOB_SOLVE; 1634 PetscMUMPS_c(mumps); 1635 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1636 1637 /* handle expansion step of Schur complement (if any) */ 1638 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1639 else if (mumps->id.ICNTL(26) == 1) { 1640 PetscCall(MatMumpsSolveSchur_Private(A)); 1641 for (j = 0; j < nrhs; ++j) 1642 for (i = 0; i < mumps->id.size_schur; ++i) { 1643 #if !defined(PETSC_USE_COMPLEX) 1644 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs]; 1645 #else 1646 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i; 1647 #endif 1648 array[mumps->id.listvar_schur[i] - 1 + j * M] = val; 1649 } 1650 } 1651 if (!denseB) { /* sparse B */ 1652 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1653 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1654 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1655 } 1656 PetscCall(MatDenseRestoreArray(X, &array)); 1657 PetscFunctionReturn(PETSC_SUCCESS); 1658 } 1659 1660 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1661 PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1662 1663 /* create msol_loc to hold mumps local solution */ 1664 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1665 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1666 1667 lsol_loc = mumps->id.lsol_loc; 1668 PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */ 1669 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1670 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1671 mumps->id.isol_loc = isol_loc; 1672 1673 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1674 1675 if (denseB) { 1676 if (mumps->ICNTL20 == 10) { 1677 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1678 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1679 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1680 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1681 PetscCall(MatGetLocalSize(B, &m, NULL)); 1682 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); 1683 } else { 1684 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1685 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1686 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1687 0, re-arrange B into desired order, which is a local operation. 1688 */ 1689 1690 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1691 /* wrap dense rhs matrix B into a vector v_mpi */ 1692 PetscCall(MatGetLocalSize(B, &m, NULL)); 1693 PetscCall(MatDenseGetArray(B, &bray)); 1694 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, (const PetscScalar *)bray, &v_mpi)); 1695 PetscCall(MatDenseRestoreArray(B, &bray)); 1696 1697 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1698 if (!mumps->myid) { 1699 PetscInt *idx; 1700 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1701 PetscCall(PetscMalloc1(nrhsM, &idx)); 1702 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1703 for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) { 1704 for (j = 0; j < nrhs; j++) { 1705 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1706 } 1707 } 1708 1709 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq)); 1710 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to)); 1711 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from)); 1712 } else { 1713 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1714 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1715 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1716 } 1717 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1718 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1719 PetscCall(ISDestroy(&is_to)); 1720 PetscCall(ISDestroy(&is_from)); 1721 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1722 1723 if (!mumps->myid) { /* define rhs on the host */ 1724 PetscCall(VecGetArray(b_seq, &bray)); 1725 mumps->id.rhs = (MumpsScalar *)bray; 1726 PetscCall(VecRestoreArray(b_seq, &bray)); 1727 } 1728 } 1729 } else { /* sparse B */ 1730 b = (Mat_MPIAIJ *)Bt->data; 1731 1732 /* wrap dense X into a vector v_mpi */ 1733 PetscCall(MatGetLocalSize(X, &m, NULL)); 1734 PetscCall(MatDenseGetArray(X, &bray)); 1735 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, (const PetscScalar *)bray, &v_mpi)); 1736 PetscCall(MatDenseRestoreArray(X, &bray)); 1737 1738 if (!mumps->myid) { 1739 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1740 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1741 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1742 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1743 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1744 } else { 1745 mumps->id.irhs_ptr = NULL; 1746 mumps->id.irhs_sparse = NULL; 1747 mumps->id.nz_rhs = 0; 1748 mumps->id.rhs_sparse = NULL; 1749 } 1750 } 1751 1752 /* solve phase */ 1753 mumps->id.job = JOB_SOLVE; 1754 PetscMUMPS_c(mumps); 1755 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1756 1757 /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */ 1758 PetscCall(MatDenseGetArray(X, &array)); 1759 PetscCall(VecPlaceArray(v_mpi, array)); 1760 1761 /* create scatter scat_sol */ 1762 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1763 /* iidx: index for scatter mumps solution to PETSc X */ 1764 1765 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1766 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1767 for (i = 0; i < lsol_loc; i++) { 1768 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1769 1770 for (proc = 0; proc < mumps->petsc_size; proc++) { 1771 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1772 myrstart = rstart[proc]; 1773 k = isol_loc[i] - myrstart; /* local index on 1st column of PETSc vector X */ 1774 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to PETSc index in X */ 1775 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1776 break; 1777 } 1778 } 1779 1780 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1781 } 1782 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1783 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1784 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1785 PetscCall(ISDestroy(&is_from)); 1786 PetscCall(ISDestroy(&is_to)); 1787 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1788 PetscCall(MatDenseRestoreArray(X, &array)); 1789 1790 /* free spaces */ 1791 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1792 mumps->id.isol_loc = isol_loc_save; 1793 1794 PetscCall(PetscFree2(sol_loc, isol_loc)); 1795 PetscCall(PetscFree(idxx)); 1796 PetscCall(VecDestroy(&msol_loc)); 1797 PetscCall(VecDestroy(&v_mpi)); 1798 if (!denseB) { 1799 if (!mumps->myid) { 1800 b = (Mat_MPIAIJ *)Bt->data; 1801 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1802 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1803 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1804 } 1805 } else { 1806 if (mumps->ICNTL20 == 0) { 1807 PetscCall(VecDestroy(&b_seq)); 1808 PetscCall(VecScatterDestroy(&scat_rhs)); 1809 } 1810 } 1811 PetscCall(VecScatterDestroy(&scat_sol)); 1812 PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1813 PetscFunctionReturn(PETSC_SUCCESS); 1814 } 1815 1816 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1817 { 1818 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1819 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1820 1821 PetscFunctionBegin; 1822 mumps->id.ICNTL(9) = 0; 1823 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1824 mumps->id.ICNTL(9) = value; 1825 PetscFunctionReturn(PETSC_SUCCESS); 1826 } 1827 1828 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1829 { 1830 PetscBool flg; 1831 Mat B; 1832 1833 PetscFunctionBegin; 1834 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1835 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1836 1837 /* Create B=Bt^T that uses Bt's data structure */ 1838 PetscCall(MatCreateTranspose(Bt, &B)); 1839 1840 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1841 PetscCall(MatDestroy(&B)); 1842 PetscFunctionReturn(PETSC_SUCCESS); 1843 } 1844 1845 #if !defined(PETSC_USE_COMPLEX) 1846 /* 1847 input: 1848 F: numeric factor 1849 output: 1850 nneg: total number of negative pivots 1851 nzero: total number of zero pivots 1852 npos: (global dimension of F) - nneg - nzero 1853 */ 1854 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1855 { 1856 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1857 PetscMPIInt size; 1858 1859 PetscFunctionBegin; 1860 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1861 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1862 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1863 1864 if (nneg) *nneg = mumps->id.INFOG(12); 1865 if (nzero || npos) { 1866 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1867 if (nzero) *nzero = mumps->id.INFOG(28); 1868 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1869 } 1870 PetscFunctionReturn(PETSC_SUCCESS); 1871 } 1872 #endif 1873 1874 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1875 { 1876 PetscMPIInt nreqs; 1877 PetscMUMPSInt *irn, *jcn; 1878 PetscMPIInt count; 1879 PetscCount totnnz, remain; 1880 const PetscInt osize = mumps->omp_comm_size; 1881 PetscScalar *val; 1882 1883 PetscFunctionBegin; 1884 if (osize > 1) { 1885 if (reuse == MAT_INITIAL_MATRIX) { 1886 /* master first gathers counts of nonzeros to receive */ 1887 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1888 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1889 1890 /* Then each computes number of send/recvs */ 1891 if (mumps->is_omp_master) { 1892 /* Start from 1 since self communication is not done in MPI */ 1893 nreqs = 0; 1894 for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1895 } else { 1896 nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX)); 1897 } 1898 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1899 1900 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1901 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1902 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1903 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1904 */ 1905 nreqs = 0; /* counter for actual send/recvs */ 1906 if (mumps->is_omp_master) { 1907 totnnz = 0; 1908 1909 for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1910 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1911 PetscCall(PetscMalloc1(totnnz, &val)); 1912 1913 /* Self communication */ 1914 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1915 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1916 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1917 1918 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1919 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1920 PetscCall(PetscFree(mumps->val_alloc)); 1921 mumps->nnz = totnnz; 1922 mumps->irn = irn; 1923 mumps->jcn = jcn; 1924 mumps->val = mumps->val_alloc = val; 1925 1926 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1927 jcn += mumps->recvcount[0]; 1928 val += mumps->recvcount[0]; 1929 1930 /* Remote communication */ 1931 for (PetscMPIInt i = 1; i < osize; i++) { 1932 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 1933 remain = mumps->recvcount[i] - count; 1934 while (count > 0) { 1935 PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1936 PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1937 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1938 irn += count; 1939 jcn += count; 1940 val += count; 1941 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1942 remain -= count; 1943 } 1944 } 1945 } else { 1946 irn = mumps->irn; 1947 jcn = mumps->jcn; 1948 val = mumps->val; 1949 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 1950 remain = mumps->nnz - count; 1951 while (count > 0) { 1952 PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1953 PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1954 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1955 irn += count; 1956 jcn += count; 1957 val += count; 1958 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1959 remain -= count; 1960 } 1961 } 1962 } else { 1963 nreqs = 0; 1964 if (mumps->is_omp_master) { 1965 val = mumps->val + mumps->recvcount[0]; 1966 for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1967 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 1968 remain = mumps->recvcount[i] - count; 1969 while (count > 0) { 1970 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1971 val += count; 1972 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1973 remain -= count; 1974 } 1975 } 1976 } else { 1977 val = mumps->val; 1978 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 1979 remain = mumps->nnz - count; 1980 while (count > 0) { 1981 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1982 val += count; 1983 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1984 remain -= count; 1985 } 1986 } 1987 } 1988 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1989 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1990 } 1991 PetscFunctionReturn(PETSC_SUCCESS); 1992 } 1993 1994 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info) 1995 { 1996 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1997 PetscBool isMPIAIJ; 1998 1999 PetscFunctionBegin; 2000 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 2001 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2002 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2003 PetscFunctionReturn(PETSC_SUCCESS); 2004 } 2005 2006 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 2007 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 2008 2009 /* numerical factorization phase */ 2010 mumps->id.job = JOB_FACTNUMERIC; 2011 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2012 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 2013 } else { 2014 mumps->id.a_loc = (MumpsScalar *)mumps->val; 2015 } 2016 PetscMUMPS_c(mumps); 2017 if (mumps->id.INFOG(1) < 0) { 2018 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2019 if (mumps->id.INFOG(1) == -10) { 2020 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2021 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2022 } else if (mumps->id.INFOG(1) == -13) { 2023 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2024 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2025 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2026 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2027 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2028 } else { 2029 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2030 F->factorerrortype = MAT_FACTOR_OTHER; 2031 } 2032 } 2033 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2034 2035 F->assembled = PETSC_TRUE; 2036 2037 if (F->schur) { /* reset Schur status to unfactored */ 2038 #if defined(PETSC_HAVE_CUDA) 2039 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 2040 #endif 2041 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2042 mumps->id.ICNTL(19) = 2; 2043 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2044 } 2045 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2046 } 2047 2048 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2049 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2050 2051 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2052 if (mumps->petsc_size > 1) { 2053 PetscInt lsol_loc; 2054 PetscScalar *sol_loc; 2055 2056 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 2057 2058 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2059 if (mumps->x_seq) { 2060 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2061 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 2062 PetscCall(VecDestroy(&mumps->x_seq)); 2063 } 2064 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2065 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 2066 mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc; 2067 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 2068 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 2069 } 2070 PetscCall(PetscLogFlops((double)mumps->id.RINFO(2))); 2071 PetscFunctionReturn(PETSC_SUCCESS); 2072 } 2073 2074 /* Sets MUMPS options from the options database */ 2075 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2076 { 2077 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2078 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2079 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2080 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 2081 MumpsScalar *arr; 2082 2083 PetscFunctionBegin; 2084 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2085 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2086 PetscInt nthreads = 0; 2087 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2088 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2089 2090 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2091 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2092 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2093 2094 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2095 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2096 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2097 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2098 if (mumps->use_petsc_omp_support) { 2099 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2100 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2101 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2102 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2103 #else 2104 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2105 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2106 #endif 2107 } else { 2108 mumps->omp_comm = PETSC_COMM_SELF; 2109 mumps->mumps_comm = mumps->petsc_comm; 2110 mumps->is_omp_master = PETSC_TRUE; 2111 } 2112 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2113 mumps->reqs = NULL; 2114 mumps->tag = 0; 2115 2116 if (mumps->mumps_comm != MPI_COMM_NULL) { 2117 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2118 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2119 MPI_Comm comm; 2120 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2121 mumps->mumps_comm = comm; 2122 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2123 } 2124 2125 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2126 mumps->id.job = JOB_INIT; 2127 mumps->id.par = 1; /* host participates factorizaton and solve */ 2128 mumps->id.sym = mumps->sym; 2129 2130 size = mumps->id.size_schur; 2131 arr = mumps->id.schur; 2132 listvar_schur = mumps->id.listvar_schur; 2133 PetscMUMPS_c(mumps); 2134 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2135 2136 /* set PETSc-MUMPS default options - override MUMPS default */ 2137 mumps->id.ICNTL(3) = 0; 2138 mumps->id.ICNTL(4) = 0; 2139 if (mumps->petsc_size == 1) { 2140 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2141 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2142 } else { 2143 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2144 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2145 } 2146 2147 /* restore cached ICNTL and CNTL values */ 2148 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2149 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2150 PetscCall(PetscFree(mumps->ICNTL_pre)); 2151 PetscCall(PetscFree(mumps->CNTL_pre)); 2152 2153 if (schur) { 2154 mumps->id.size_schur = size; 2155 mumps->id.schur_lld = size; 2156 mumps->id.schur = arr; 2157 mumps->id.listvar_schur = listvar_schur; 2158 if (mumps->petsc_size > 1) { 2159 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2160 2161 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2162 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2163 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2164 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2165 } else { 2166 if (F->factortype == MAT_FACTOR_LU) { 2167 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2168 } else { 2169 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2170 } 2171 } 2172 mumps->id.ICNTL(26) = -1; 2173 } 2174 2175 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2176 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2177 */ 2178 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2179 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2180 2181 mumps->scat_rhs = NULL; 2182 mumps->scat_sol = NULL; 2183 } 2184 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2185 if (flg) mumps->id.ICNTL(1) = icntl; 2186 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2187 if (flg) mumps->id.ICNTL(2) = icntl; 2188 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2189 if (flg) mumps->id.ICNTL(3) = icntl; 2190 2191 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2192 if (flg) mumps->id.ICNTL(4) = icntl; 2193 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2194 2195 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2196 if (flg) mumps->id.ICNTL(6) = icntl; 2197 2198 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2199 if (flg) { 2200 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2201 mumps->id.ICNTL(7) = icntl; 2202 } 2203 2204 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2205 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2206 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2207 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2208 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2209 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2210 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2211 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2212 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs; 2213 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2214 if (flg) { 2215 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2216 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2217 } 2218 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2219 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2220 PetscCall(MatDestroy(&F->schur)); 2221 PetscCall(MatMumpsResetSchur_Private(mumps)); 2222 } 2223 2224 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2225 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2226 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2227 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2228 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2229 In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 2230 */ 2231 mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */ 2232 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI) 2233 mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */ 2234 #endif 2235 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2236 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2237 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2238 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2239 #endif 2240 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2241 2242 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2243 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2244 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2245 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2246 2247 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2248 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2249 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2250 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2251 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2252 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2253 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2254 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2255 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2256 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2257 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2258 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL)); 2259 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2260 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL)); 2261 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2262 2263 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2264 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2265 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2266 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2267 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2268 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2269 2270 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2271 2272 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2273 if (ninfo) { 2274 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2275 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2276 mumps->ninfo = ninfo; 2277 for (i = 0; i < ninfo; i++) { 2278 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2279 mumps->info[i] = info[i]; 2280 } 2281 } 2282 PetscOptionsEnd(); 2283 PetscFunctionReturn(PETSC_SUCCESS); 2284 } 2285 2286 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps) 2287 { 2288 PetscFunctionBegin; 2289 if (mumps->id.INFOG(1) < 0) { 2290 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2291 if (mumps->id.INFOG(1) == -6) { 2292 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2293 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2294 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2295 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2296 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2297 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2298 PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n")); 2299 } else { 2300 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2301 F->factorerrortype = MAT_FACTOR_OTHER; 2302 } 2303 } 2304 if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR; 2305 PetscFunctionReturn(PETSC_SUCCESS); 2306 } 2307 2308 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2309 { 2310 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2311 Vec b; 2312 const PetscInt M = A->rmap->N; 2313 2314 PetscFunctionBegin; 2315 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2316 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2317 PetscFunctionReturn(PETSC_SUCCESS); 2318 } 2319 2320 /* Set MUMPS options from the options database */ 2321 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2322 2323 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2324 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2325 2326 /* analysis phase */ 2327 mumps->id.job = JOB_FACTSYMBOLIC; 2328 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2329 switch (mumps->id.ICNTL(18)) { 2330 case 0: /* centralized assembled matrix input */ 2331 if (!mumps->myid) { 2332 mumps->id.nnz = mumps->nnz; 2333 mumps->id.irn = mumps->irn; 2334 mumps->id.jcn = mumps->jcn; 2335 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2336 if (r && mumps->id.ICNTL(7) == 7) { 2337 mumps->id.ICNTL(7) = 1; 2338 if (!mumps->myid) { 2339 const PetscInt *idx; 2340 PetscInt i; 2341 2342 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2343 PetscCall(ISGetIndices(r, &idx)); 2344 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */ 2345 PetscCall(ISRestoreIndices(r, &idx)); 2346 } 2347 } 2348 } 2349 break; 2350 case 3: /* distributed assembled matrix input (size>1) */ 2351 mumps->id.nnz_loc = mumps->nnz; 2352 mumps->id.irn_loc = mumps->irn; 2353 mumps->id.jcn_loc = mumps->jcn; 2354 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2355 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2356 PetscCall(MatCreateVecs(A, NULL, &b)); 2357 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2358 PetscCall(VecDestroy(&b)); 2359 } 2360 break; 2361 } 2362 PetscMUMPS_c(mumps); 2363 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2364 2365 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2366 F->ops->solve = MatSolve_MUMPS; 2367 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2368 F->ops->matsolve = MatMatSolve_MUMPS; 2369 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2370 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2371 2372 mumps->matstruc = SAME_NONZERO_PATTERN; 2373 PetscFunctionReturn(PETSC_SUCCESS); 2374 } 2375 2376 /* Note the PETSc r and c permutations are ignored */ 2377 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2378 { 2379 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2380 Vec b; 2381 const PetscInt M = A->rmap->N; 2382 2383 PetscFunctionBegin; 2384 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2385 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2386 PetscFunctionReturn(PETSC_SUCCESS); 2387 } 2388 2389 /* Set MUMPS options from the options database */ 2390 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2391 2392 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2393 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2394 2395 /* analysis phase */ 2396 mumps->id.job = JOB_FACTSYMBOLIC; 2397 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2398 switch (mumps->id.ICNTL(18)) { 2399 case 0: /* centralized assembled matrix input */ 2400 if (!mumps->myid) { 2401 mumps->id.nnz = mumps->nnz; 2402 mumps->id.irn = mumps->irn; 2403 mumps->id.jcn = mumps->jcn; 2404 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2405 } 2406 break; 2407 case 3: /* distributed assembled matrix input (size>1) */ 2408 mumps->id.nnz_loc = mumps->nnz; 2409 mumps->id.irn_loc = mumps->irn; 2410 mumps->id.jcn_loc = mumps->jcn; 2411 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2412 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2413 PetscCall(MatCreateVecs(A, NULL, &b)); 2414 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2415 PetscCall(VecDestroy(&b)); 2416 } 2417 break; 2418 } 2419 PetscMUMPS_c(mumps); 2420 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2421 2422 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2423 F->ops->solve = MatSolve_MUMPS; 2424 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2425 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2426 2427 mumps->matstruc = SAME_NONZERO_PATTERN; 2428 PetscFunctionReturn(PETSC_SUCCESS); 2429 } 2430 2431 /* Note the PETSc r permutation and factor info are ignored */ 2432 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info) 2433 { 2434 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2435 Vec b; 2436 const PetscInt M = A->rmap->N; 2437 2438 PetscFunctionBegin; 2439 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2440 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2441 PetscFunctionReturn(PETSC_SUCCESS); 2442 } 2443 2444 /* Set MUMPS options from the options database */ 2445 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2446 2447 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2448 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2449 2450 /* analysis phase */ 2451 mumps->id.job = JOB_FACTSYMBOLIC; 2452 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2453 switch (mumps->id.ICNTL(18)) { 2454 case 0: /* centralized assembled matrix input */ 2455 if (!mumps->myid) { 2456 mumps->id.nnz = mumps->nnz; 2457 mumps->id.irn = mumps->irn; 2458 mumps->id.jcn = mumps->jcn; 2459 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2460 } 2461 break; 2462 case 3: /* distributed assembled matrix input (size>1) */ 2463 mumps->id.nnz_loc = mumps->nnz; 2464 mumps->id.irn_loc = mumps->irn; 2465 mumps->id.jcn_loc = mumps->jcn; 2466 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2467 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2468 PetscCall(MatCreateVecs(A, NULL, &b)); 2469 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2470 PetscCall(VecDestroy(&b)); 2471 } 2472 break; 2473 } 2474 PetscMUMPS_c(mumps); 2475 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2476 2477 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2478 F->ops->solve = MatSolve_MUMPS; 2479 F->ops->solvetranspose = MatSolve_MUMPS; 2480 F->ops->matsolve = MatMatSolve_MUMPS; 2481 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2482 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2483 #if defined(PETSC_USE_COMPLEX) 2484 F->ops->getinertia = NULL; 2485 #else 2486 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2487 #endif 2488 2489 mumps->matstruc = SAME_NONZERO_PATTERN; 2490 PetscFunctionReturn(PETSC_SUCCESS); 2491 } 2492 2493 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2494 { 2495 PetscBool iascii; 2496 PetscViewerFormat format; 2497 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2498 2499 PetscFunctionBegin; 2500 /* check if matrix is mumps type */ 2501 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2502 2503 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2504 if (iascii) { 2505 PetscCall(PetscViewerGetFormat(viewer, &format)); 2506 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2507 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2508 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2509 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2510 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2511 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2512 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2513 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2514 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2515 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2516 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2517 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2518 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2519 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2520 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2521 if (mumps->id.ICNTL(11) > 0) { 2522 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)mumps->id.RINFOG(4))); 2523 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)mumps->id.RINFOG(5))); 2524 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)mumps->id.RINFOG(6))); 2525 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)mumps->id.RINFOG(7), (double)mumps->id.RINFOG(8))); 2526 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)mumps->id.RINFOG(9))); 2527 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)mumps->id.RINFOG(10), (double)mumps->id.RINFOG(11))); 2528 } 2529 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2530 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2531 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2532 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2533 /* ICNTL(15-17) not used */ 2534 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2535 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2536 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2537 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2538 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2539 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2540 2541 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2542 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2543 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2544 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2545 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2546 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2547 2548 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2549 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2550 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2551 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2552 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2553 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(37) (compression of the contribution blocks): %d\n", mumps->id.ICNTL(37))); 2554 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2555 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(48) (multithreading with tree parallelism): %d\n", mumps->id.ICNTL(48))); 2556 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2557 2558 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)mumps->id.CNTL(1))); 2559 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)mumps->id.CNTL(2))); 2560 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)mumps->id.CNTL(3))); 2561 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)mumps->id.CNTL(4))); 2562 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)mumps->id.CNTL(5))); 2563 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)mumps->id.CNTL(7))); 2564 2565 /* information local to each processor */ 2566 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2567 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2568 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(1))); 2569 PetscCall(PetscViewerFlush(viewer)); 2570 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2571 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(2))); 2572 PetscCall(PetscViewerFlush(viewer)); 2573 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2574 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(3))); 2575 PetscCall(PetscViewerFlush(viewer)); 2576 2577 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2578 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2579 PetscCall(PetscViewerFlush(viewer)); 2580 2581 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2582 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2583 PetscCall(PetscViewerFlush(viewer)); 2584 2585 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2586 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2587 PetscCall(PetscViewerFlush(viewer)); 2588 2589 if (mumps->ninfo && mumps->ninfo <= 80) { 2590 PetscInt i; 2591 for (i = 0; i < mumps->ninfo; i++) { 2592 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2593 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2594 PetscCall(PetscViewerFlush(viewer)); 2595 } 2596 } 2597 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2598 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2599 2600 if (mumps->myid == 0) { /* information from the host */ 2601 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)mumps->id.RINFOG(1))); 2602 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)mumps->id.RINFOG(2))); 2603 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)mumps->id.RINFOG(3))); 2604 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)mumps->id.RINFOG(12), (double)mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2605 2606 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2607 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2608 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2609 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2610 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2611 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2612 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2613 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2614 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2615 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2616 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2617 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2618 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2619 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2620 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2621 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2622 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2623 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2624 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2625 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2626 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2627 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2628 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2629 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2630 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2631 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2632 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2633 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2634 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2635 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2636 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2637 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2638 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2639 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2640 } 2641 } 2642 } 2643 PetscFunctionReturn(PETSC_SUCCESS); 2644 } 2645 2646 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info) 2647 { 2648 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2649 2650 PetscFunctionBegin; 2651 info->block_size = 1.0; 2652 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2653 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2654 info->nz_unneeded = 0.0; 2655 info->assemblies = 0.0; 2656 info->mallocs = 0.0; 2657 info->memory = 0.0; 2658 info->fill_ratio_given = 0; 2659 info->fill_ratio_needed = 0; 2660 info->factor_mallocs = 0; 2661 PetscFunctionReturn(PETSC_SUCCESS); 2662 } 2663 2664 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2665 { 2666 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2667 const PetscScalar *arr; 2668 const PetscInt *idxs; 2669 PetscInt size, i; 2670 2671 PetscFunctionBegin; 2672 PetscCall(ISGetLocalSize(is, &size)); 2673 /* Schur complement matrix */ 2674 PetscCall(MatDestroy(&F->schur)); 2675 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2676 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2677 mumps->id.schur = (MumpsScalar *)arr; 2678 PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur)); 2679 PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld)); 2680 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2681 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2682 2683 /* MUMPS expects Fortran style indices */ 2684 PetscCall(PetscFree(mumps->id.listvar_schur)); 2685 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2686 PetscCall(ISGetIndices(is, &idxs)); 2687 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i])); 2688 PetscCall(ISRestoreIndices(is, &idxs)); 2689 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2690 mumps->id.ICNTL(26) = -1; 2691 PetscFunctionReturn(PETSC_SUCCESS); 2692 } 2693 2694 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2695 { 2696 Mat St; 2697 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2698 PetscScalar *array; 2699 2700 PetscFunctionBegin; 2701 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 2702 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2703 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2704 PetscCall(MatSetType(St, MATDENSE)); 2705 PetscCall(MatSetUp(St)); 2706 PetscCall(MatDenseGetArray(St, &array)); 2707 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2708 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2709 PetscInt i, j, N = mumps->id.size_schur; 2710 for (i = 0; i < N; i++) { 2711 for (j = 0; j < N; j++) { 2712 #if !defined(PETSC_USE_COMPLEX) 2713 PetscScalar val = mumps->id.schur[i * N + j]; 2714 #else 2715 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2716 #endif 2717 array[j * N + i] = val; 2718 } 2719 } 2720 } else { /* stored by columns */ 2721 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2722 } 2723 } else { /* either full or lower-triangular (not packed) */ 2724 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2725 PetscInt i, j, N = mumps->id.size_schur; 2726 for (i = 0; i < N; i++) { 2727 for (j = i; j < N; j++) { 2728 #if !defined(PETSC_USE_COMPLEX) 2729 PetscScalar val = mumps->id.schur[i * N + j]; 2730 #else 2731 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2732 #endif 2733 array[i * N + j] = array[j * N + i] = val; 2734 } 2735 } 2736 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2737 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2738 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2739 PetscInt i, j, N = mumps->id.size_schur; 2740 for (i = 0; i < N; i++) { 2741 for (j = 0; j < i + 1; j++) { 2742 #if !defined(PETSC_USE_COMPLEX) 2743 PetscScalar val = mumps->id.schur[i * N + j]; 2744 #else 2745 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2746 #endif 2747 array[i * N + j] = array[j * N + i] = val; 2748 } 2749 } 2750 } 2751 } 2752 PetscCall(MatDenseRestoreArray(St, &array)); 2753 *S = St; 2754 PetscFunctionReturn(PETSC_SUCCESS); 2755 } 2756 2757 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2758 { 2759 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2760 2761 PetscFunctionBegin; 2762 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2763 PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2764 for (i = 0; i < nICNTL_pre; ++i) 2765 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2766 if (i == nICNTL_pre) { /* not already cached */ 2767 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2768 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2769 mumps->ICNTL_pre[0]++; 2770 } 2771 mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl; 2772 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2773 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2774 PetscFunctionReturn(PETSC_SUCCESS); 2775 } 2776 2777 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2778 { 2779 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2780 2781 PetscFunctionBegin; 2782 if (mumps->id.job == JOB_NULL) { 2783 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2784 *ival = 0; 2785 for (i = 0; i < nICNTL_pre; ++i) { 2786 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2787 } 2788 } else *ival = mumps->id.ICNTL(icntl); 2789 PetscFunctionReturn(PETSC_SUCCESS); 2790 } 2791 2792 /*@ 2793 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 2794 2795 Logically Collective 2796 2797 Input Parameters: 2798 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 2799 . icntl - index of MUMPS parameter array `ICNTL()` 2800 - ival - value of MUMPS `ICNTL(icntl)` 2801 2802 Options Database Key: 2803 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival` 2804 2805 Level: beginner 2806 2807 Note: 2808 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 2809 2810 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2811 @*/ 2812 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2813 { 2814 PetscFunctionBegin; 2815 PetscValidType(F, 1); 2816 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2817 PetscValidLogicalCollectiveInt(F, icntl, 2); 2818 PetscValidLogicalCollectiveInt(F, ival, 3); 2819 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2820 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2821 PetscFunctionReturn(PETSC_SUCCESS); 2822 } 2823 2824 /*@ 2825 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 2826 2827 Logically Collective 2828 2829 Input Parameters: 2830 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 2831 - icntl - index of MUMPS parameter array ICNTL() 2832 2833 Output Parameter: 2834 . ival - value of MUMPS ICNTL(icntl) 2835 2836 Level: beginner 2837 2838 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2839 @*/ 2840 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2841 { 2842 PetscFunctionBegin; 2843 PetscValidType(F, 1); 2844 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2845 PetscValidLogicalCollectiveInt(F, icntl, 2); 2846 PetscAssertPointer(ival, 3); 2847 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2848 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2849 PetscFunctionReturn(PETSC_SUCCESS); 2850 } 2851 2852 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2853 { 2854 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2855 2856 PetscFunctionBegin; 2857 if (mumps->id.job == JOB_NULL) { 2858 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2859 for (i = 0; i < nCNTL_pre; ++i) 2860 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2861 if (i == nCNTL_pre) { 2862 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2863 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2864 mumps->CNTL_pre[0]++; 2865 } 2866 mumps->CNTL_pre[1 + 2 * i] = icntl; 2867 mumps->CNTL_pre[2 + 2 * i] = val; 2868 } else mumps->id.CNTL(icntl) = val; 2869 PetscFunctionReturn(PETSC_SUCCESS); 2870 } 2871 2872 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2873 { 2874 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2875 2876 PetscFunctionBegin; 2877 if (mumps->id.job == JOB_NULL) { 2878 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2879 *val = 0.0; 2880 for (i = 0; i < nCNTL_pre; ++i) { 2881 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2882 } 2883 } else *val = mumps->id.CNTL(icntl); 2884 PetscFunctionReturn(PETSC_SUCCESS); 2885 } 2886 2887 /*@ 2888 MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 2889 2890 Logically Collective 2891 2892 Input Parameters: 2893 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 2894 . icntl - index of MUMPS parameter array `CNTL()` 2895 - val - value of MUMPS `CNTL(icntl)` 2896 2897 Options Database Key: 2898 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2899 2900 Level: beginner 2901 2902 Note: 2903 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 2904 2905 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2906 @*/ 2907 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2908 { 2909 PetscFunctionBegin; 2910 PetscValidType(F, 1); 2911 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2912 PetscValidLogicalCollectiveInt(F, icntl, 2); 2913 PetscValidLogicalCollectiveReal(F, val, 3); 2914 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2915 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2916 PetscFunctionReturn(PETSC_SUCCESS); 2917 } 2918 2919 /*@ 2920 MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 2921 2922 Logically Collective 2923 2924 Input Parameters: 2925 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 2926 - icntl - index of MUMPS parameter array CNTL() 2927 2928 Output Parameter: 2929 . val - value of MUMPS CNTL(icntl) 2930 2931 Level: beginner 2932 2933 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2934 @*/ 2935 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2936 { 2937 PetscFunctionBegin; 2938 PetscValidType(F, 1); 2939 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2940 PetscValidLogicalCollectiveInt(F, icntl, 2); 2941 PetscAssertPointer(val, 3); 2942 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2943 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2944 PetscFunctionReturn(PETSC_SUCCESS); 2945 } 2946 2947 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2948 { 2949 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2950 2951 PetscFunctionBegin; 2952 *info = mumps->id.INFO(icntl); 2953 PetscFunctionReturn(PETSC_SUCCESS); 2954 } 2955 2956 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2957 { 2958 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2959 2960 PetscFunctionBegin; 2961 *infog = mumps->id.INFOG(icntl); 2962 PetscFunctionReturn(PETSC_SUCCESS); 2963 } 2964 2965 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2966 { 2967 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2968 2969 PetscFunctionBegin; 2970 *rinfo = mumps->id.RINFO(icntl); 2971 PetscFunctionReturn(PETSC_SUCCESS); 2972 } 2973 2974 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2975 { 2976 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2977 2978 PetscFunctionBegin; 2979 *rinfog = mumps->id.RINFOG(icntl); 2980 PetscFunctionReturn(PETSC_SUCCESS); 2981 } 2982 2983 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2984 { 2985 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2986 2987 PetscFunctionBegin; 2988 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2989 *size = 0; 2990 *array = NULL; 2991 if (!mumps->myid) { 2992 *size = mumps->id.INFOG(28); 2993 PetscCall(PetscMalloc1(*size, array)); 2994 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2995 } 2996 PetscFunctionReturn(PETSC_SUCCESS); 2997 } 2998 2999 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 3000 { 3001 Mat Bt = NULL, Btseq = NULL; 3002 PetscBool flg; 3003 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3004 PetscScalar *aa; 3005 PetscInt spnr, *ia, *ja, M, nrhs; 3006 3007 PetscFunctionBegin; 3008 PetscAssertPointer(spRHS, 2); 3009 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 3010 if (flg) { 3011 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 3012 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 3013 3014 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3015 3016 if (mumps->petsc_size > 1) { 3017 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 3018 Btseq = b->A; 3019 } else { 3020 Btseq = Bt; 3021 } 3022 3023 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 3024 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 3025 PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs)); 3026 mumps->id.rhs = NULL; 3027 3028 if (!mumps->myid) { 3029 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 3030 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3031 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3032 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3033 mumps->id.rhs_sparse = (MumpsScalar *)aa; 3034 } else { 3035 mumps->id.irhs_ptr = NULL; 3036 mumps->id.irhs_sparse = NULL; 3037 mumps->id.nz_rhs = 0; 3038 mumps->id.rhs_sparse = NULL; 3039 } 3040 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3041 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3042 3043 /* solve phase */ 3044 mumps->id.job = JOB_SOLVE; 3045 PetscMUMPS_c(mumps); 3046 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 3047 3048 if (!mumps->myid) { 3049 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3050 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3051 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3052 } 3053 PetscFunctionReturn(PETSC_SUCCESS); 3054 } 3055 3056 /*@ 3057 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc> 3058 3059 Logically Collective 3060 3061 Input Parameter: 3062 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3063 3064 Output Parameter: 3065 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3066 3067 Level: beginner 3068 3069 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3070 @*/ 3071 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3072 { 3073 PetscFunctionBegin; 3074 PetscValidType(F, 1); 3075 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3076 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3077 PetscFunctionReturn(PETSC_SUCCESS); 3078 } 3079 3080 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3081 { 3082 Mat spRHS; 3083 3084 PetscFunctionBegin; 3085 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3086 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3087 PetscCall(MatDestroy(&spRHS)); 3088 PetscFunctionReturn(PETSC_SUCCESS); 3089 } 3090 3091 /*@ 3092 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc> 3093 3094 Logically Collective 3095 3096 Input Parameter: 3097 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3098 3099 Output Parameter: 3100 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3101 3102 Level: beginner 3103 3104 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3105 @*/ 3106 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3107 { 3108 PetscBool flg; 3109 3110 PetscFunctionBegin; 3111 PetscValidType(F, 1); 3112 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3113 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3114 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3115 3116 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3117 PetscFunctionReturn(PETSC_SUCCESS); 3118 } 3119 3120 /*@ 3121 MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc> 3122 3123 Logically Collective 3124 3125 Input Parameters: 3126 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3127 - icntl - index of MUMPS parameter array INFO() 3128 3129 Output Parameter: 3130 . ival - value of MUMPS INFO(icntl) 3131 3132 Level: beginner 3133 3134 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3135 @*/ 3136 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3137 { 3138 PetscFunctionBegin; 3139 PetscValidType(F, 1); 3140 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3141 PetscAssertPointer(ival, 3); 3142 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3143 PetscFunctionReturn(PETSC_SUCCESS); 3144 } 3145 3146 /*@ 3147 MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc> 3148 3149 Logically Collective 3150 3151 Input Parameters: 3152 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3153 - icntl - index of MUMPS parameter array INFOG() 3154 3155 Output Parameter: 3156 . ival - value of MUMPS INFOG(icntl) 3157 3158 Level: beginner 3159 3160 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3161 @*/ 3162 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3163 { 3164 PetscFunctionBegin; 3165 PetscValidType(F, 1); 3166 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3167 PetscAssertPointer(ival, 3); 3168 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3169 PetscFunctionReturn(PETSC_SUCCESS); 3170 } 3171 3172 /*@ 3173 MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc> 3174 3175 Logically Collective 3176 3177 Input Parameters: 3178 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3179 - icntl - index of MUMPS parameter array RINFO() 3180 3181 Output Parameter: 3182 . val - value of MUMPS RINFO(icntl) 3183 3184 Level: beginner 3185 3186 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3187 @*/ 3188 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3189 { 3190 PetscFunctionBegin; 3191 PetscValidType(F, 1); 3192 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3193 PetscAssertPointer(val, 3); 3194 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3195 PetscFunctionReturn(PETSC_SUCCESS); 3196 } 3197 3198 /*@ 3199 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc> 3200 3201 Logically Collective 3202 3203 Input Parameters: 3204 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3205 - icntl - index of MUMPS parameter array RINFOG() 3206 3207 Output Parameter: 3208 . val - value of MUMPS RINFOG(icntl) 3209 3210 Level: beginner 3211 3212 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3213 @*/ 3214 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3215 { 3216 PetscFunctionBegin; 3217 PetscValidType(F, 1); 3218 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3219 PetscAssertPointer(val, 3); 3220 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3221 PetscFunctionReturn(PETSC_SUCCESS); 3222 } 3223 3224 /*@ 3225 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc> 3226 3227 Logically Collective 3228 3229 Input Parameter: 3230 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3231 3232 Output Parameters: 3233 + size - local size of the array. The size of the array is non-zero only on MPI rank 0 3234 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3235 for freeing this array. 3236 3237 Level: beginner 3238 3239 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3240 @*/ 3241 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3242 { 3243 PetscFunctionBegin; 3244 PetscValidType(F, 1); 3245 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3246 PetscAssertPointer(size, 2); 3247 PetscAssertPointer(array, 3); 3248 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3249 PetscFunctionReturn(PETSC_SUCCESS); 3250 } 3251 3252 /*MC 3253 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3254 MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc> 3255 3256 Works with `MATAIJ` and `MATSBAIJ` matrices 3257 3258 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3259 3260 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3261 See details below. 3262 3263 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3264 3265 Options Database Keys: 3266 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3267 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3268 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3269 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3270 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3271 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3272 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3273 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3274 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3275 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3276 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3277 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3278 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3279 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3280 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3281 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3282 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3283 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3284 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3285 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3286 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3287 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering 3288 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3289 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3290 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3291 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3292 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3293 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3294 . -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB) 3295 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3296 . -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism 3297 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3298 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3299 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3300 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3301 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3302 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3303 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3304 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3305 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3306 3307 Level: beginner 3308 3309 Notes: 3310 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will 3311 error if the matrix is Hermitian. 3312 3313 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3314 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3315 3316 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3317 the failure with 3318 .vb 3319 KSPGetPC(ksp,&pc); 3320 PCFactorGetMatrix(pc,&mat); 3321 MatMumpsGetInfo(mat,....); 3322 MatMumpsGetInfog(mat,....); etc. 3323 .ve 3324 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3325 3326 MUMPS provides 64-bit integer support in two build modes: 3327 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3328 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3329 3330 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3331 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3332 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3333 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3334 3335 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3336 3337 Two modes to run MUMPS/PETSc with OpenMP 3338 .vb 3339 Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3340 threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test". 3341 .ve 3342 3343 .vb 3344 `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example, 3345 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3346 .ve 3347 3348 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3349 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3350 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3351 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3352 (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided). 3353 3354 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3355 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3356 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3357 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3358 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3359 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3360 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3361 MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3362 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3363 problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding. 3364 For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and 3365 examine the mapping result. 3366 3367 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3368 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3369 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3370 3371 See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating` 3372 3373 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3374 M*/ 3375 3376 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type) 3377 { 3378 PetscFunctionBegin; 3379 *type = MATSOLVERMUMPS; 3380 PetscFunctionReturn(PETSC_SUCCESS); 3381 } 3382 3383 /* MatGetFactor for Seq and MPI AIJ matrices */ 3384 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3385 { 3386 Mat B; 3387 Mat_MUMPS *mumps; 3388 PetscBool isSeqAIJ, isDiag, isDense; 3389 PetscMPIInt size; 3390 3391 PetscFunctionBegin; 3392 #if defined(PETSC_USE_COMPLEX) 3393 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3394 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3395 *F = NULL; 3396 PetscFunctionReturn(PETSC_SUCCESS); 3397 } 3398 #endif 3399 /* Create the factorization matrix */ 3400 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3401 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 3402 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3403 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3404 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3405 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3406 PetscCall(MatSetUp(B)); 3407 3408 PetscCall(PetscNew(&mumps)); 3409 3410 B->ops->view = MatView_MUMPS; 3411 B->ops->getinfo = MatGetInfo_MUMPS; 3412 3413 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3414 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3415 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3416 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3417 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3418 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3419 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3420 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3421 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3422 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3423 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3424 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3425 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3426 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3427 3428 if (ftype == MAT_FACTOR_LU) { 3429 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3430 B->factortype = MAT_FACTOR_LU; 3431 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3432 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3433 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3434 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3435 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3436 mumps->sym = 0; 3437 } else { 3438 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3439 B->factortype = MAT_FACTOR_CHOLESKY; 3440 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3441 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3442 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3443 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3444 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3445 #if defined(PETSC_USE_COMPLEX) 3446 mumps->sym = 2; 3447 #else 3448 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3449 else mumps->sym = 2; 3450 #endif 3451 } 3452 3453 /* set solvertype */ 3454 PetscCall(PetscFree(B->solvertype)); 3455 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3456 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3457 if (size == 1) { 3458 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3459 B->canuseordering = PETSC_TRUE; 3460 } 3461 B->ops->destroy = MatDestroy_MUMPS; 3462 B->data = (void *)mumps; 3463 3464 *F = B; 3465 mumps->id.job = JOB_NULL; 3466 mumps->ICNTL_pre = NULL; 3467 mumps->CNTL_pre = NULL; 3468 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3469 PetscFunctionReturn(PETSC_SUCCESS); 3470 } 3471 3472 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3473 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F) 3474 { 3475 Mat B; 3476 Mat_MUMPS *mumps; 3477 PetscBool isSeqSBAIJ; 3478 PetscMPIInt size; 3479 3480 PetscFunctionBegin; 3481 #if defined(PETSC_USE_COMPLEX) 3482 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3483 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3484 *F = NULL; 3485 PetscFunctionReturn(PETSC_SUCCESS); 3486 } 3487 #endif 3488 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3489 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3490 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3491 PetscCall(MatSetUp(B)); 3492 3493 PetscCall(PetscNew(&mumps)); 3494 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3495 if (isSeqSBAIJ) { 3496 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3497 } else { 3498 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3499 } 3500 3501 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3502 B->ops->view = MatView_MUMPS; 3503 B->ops->getinfo = MatGetInfo_MUMPS; 3504 3505 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3506 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3507 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3508 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3509 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3510 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3511 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3512 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3513 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3514 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3515 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3516 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3517 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3518 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3519 3520 B->factortype = MAT_FACTOR_CHOLESKY; 3521 #if defined(PETSC_USE_COMPLEX) 3522 mumps->sym = 2; 3523 #else 3524 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3525 else mumps->sym = 2; 3526 #endif 3527 3528 /* set solvertype */ 3529 PetscCall(PetscFree(B->solvertype)); 3530 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3531 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3532 if (size == 1) { 3533 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3534 B->canuseordering = PETSC_TRUE; 3535 } 3536 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3537 B->ops->destroy = MatDestroy_MUMPS; 3538 B->data = (void *)mumps; 3539 3540 *F = B; 3541 mumps->id.job = JOB_NULL; 3542 mumps->ICNTL_pre = NULL; 3543 mumps->CNTL_pre = NULL; 3544 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3545 PetscFunctionReturn(PETSC_SUCCESS); 3546 } 3547 3548 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3549 { 3550 Mat B; 3551 Mat_MUMPS *mumps; 3552 PetscBool isSeqBAIJ; 3553 PetscMPIInt size; 3554 3555 PetscFunctionBegin; 3556 /* Create the factorization matrix */ 3557 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3558 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3559 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3560 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3561 PetscCall(MatSetUp(B)); 3562 3563 PetscCall(PetscNew(&mumps)); 3564 if (ftype == MAT_FACTOR_LU) { 3565 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3566 B->factortype = MAT_FACTOR_LU; 3567 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3568 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3569 mumps->sym = 0; 3570 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3571 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3572 3573 B->ops->view = MatView_MUMPS; 3574 B->ops->getinfo = MatGetInfo_MUMPS; 3575 3576 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3577 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3578 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3579 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3580 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3581 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3582 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3583 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3584 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3585 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3586 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3587 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3588 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3589 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3590 3591 /* set solvertype */ 3592 PetscCall(PetscFree(B->solvertype)); 3593 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3594 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3595 if (size == 1) { 3596 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3597 B->canuseordering = PETSC_TRUE; 3598 } 3599 B->ops->destroy = MatDestroy_MUMPS; 3600 B->data = (void *)mumps; 3601 3602 *F = B; 3603 mumps->id.job = JOB_NULL; 3604 mumps->ICNTL_pre = NULL; 3605 mumps->CNTL_pre = NULL; 3606 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3607 PetscFunctionReturn(PETSC_SUCCESS); 3608 } 3609 3610 /* MatGetFactor for Seq and MPI SELL matrices */ 3611 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3612 { 3613 Mat B; 3614 Mat_MUMPS *mumps; 3615 PetscBool isSeqSELL; 3616 PetscMPIInt size; 3617 3618 PetscFunctionBegin; 3619 /* Create the factorization matrix */ 3620 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3621 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3622 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3623 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3624 PetscCall(MatSetUp(B)); 3625 3626 PetscCall(PetscNew(&mumps)); 3627 3628 B->ops->view = MatView_MUMPS; 3629 B->ops->getinfo = MatGetInfo_MUMPS; 3630 3631 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3632 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3633 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3634 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3635 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3636 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3637 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3638 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3639 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3640 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3641 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3642 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3643 3644 if (ftype == MAT_FACTOR_LU) { 3645 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3646 B->factortype = MAT_FACTOR_LU; 3647 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3648 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3649 mumps->sym = 0; 3650 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3651 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3652 3653 /* set solvertype */ 3654 PetscCall(PetscFree(B->solvertype)); 3655 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3656 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3657 if (size == 1) { 3658 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3659 B->canuseordering = PETSC_TRUE; 3660 } 3661 B->ops->destroy = MatDestroy_MUMPS; 3662 B->data = (void *)mumps; 3663 3664 *F = B; 3665 mumps->id.job = JOB_NULL; 3666 mumps->ICNTL_pre = NULL; 3667 mumps->CNTL_pre = NULL; 3668 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3669 PetscFunctionReturn(PETSC_SUCCESS); 3670 } 3671 3672 /* MatGetFactor for MATNEST matrices */ 3673 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3674 { 3675 Mat B, **mats; 3676 Mat_MUMPS *mumps; 3677 PetscInt nr, nc; 3678 PetscMPIInt size; 3679 PetscBool flg = PETSC_TRUE; 3680 3681 PetscFunctionBegin; 3682 #if defined(PETSC_USE_COMPLEX) 3683 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3684 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3685 *F = NULL; 3686 PetscFunctionReturn(PETSC_SUCCESS); 3687 } 3688 #endif 3689 3690 /* Return if some condition is not satisfied */ 3691 *F = NULL; 3692 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3693 if (ftype == MAT_FACTOR_CHOLESKY) { 3694 IS *rows, *cols; 3695 PetscInt *m, *M; 3696 3697 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3698 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3699 PetscCall(MatNestGetISs(A, rows, cols)); 3700 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3701 if (!flg) { 3702 PetscCall(PetscFree2(rows, cols)); 3703 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3704 PetscFunctionReturn(PETSC_SUCCESS); 3705 } 3706 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3707 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3708 for (PetscInt r = 0; flg && r < nr; r++) 3709 for (PetscInt k = r + 1; flg && k < nr; k++) 3710 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3711 PetscCall(PetscFree2(m, M)); 3712 PetscCall(PetscFree2(rows, cols)); 3713 if (!flg) { 3714 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3715 PetscFunctionReturn(PETSC_SUCCESS); 3716 } 3717 } 3718 3719 for (PetscInt r = 0; r < nr; r++) { 3720 for (PetscInt c = 0; c < nc; c++) { 3721 Mat sub = mats[r][c]; 3722 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isDiag, isDense; 3723 3724 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3725 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3726 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3727 else { 3728 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3729 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3730 } 3731 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3732 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3733 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3734 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3735 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3736 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3737 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 3738 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3739 if (ftype == MAT_FACTOR_CHOLESKY) { 3740 if (r == c) { 3741 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 3742 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3743 flg = PETSC_FALSE; 3744 } 3745 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3746 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3747 flg = PETSC_FALSE; 3748 } 3749 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3750 PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3751 flg = PETSC_FALSE; 3752 } 3753 } 3754 } 3755 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3756 3757 /* Create the factorization matrix */ 3758 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3759 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3760 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3761 PetscCall(MatSetUp(B)); 3762 3763 PetscCall(PetscNew(&mumps)); 3764 3765 B->ops->view = MatView_MUMPS; 3766 B->ops->getinfo = MatGetInfo_MUMPS; 3767 3768 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3769 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3770 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3771 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3772 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3773 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3774 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3775 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3776 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3777 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3778 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3779 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3780 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3781 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3782 3783 if (ftype == MAT_FACTOR_LU) { 3784 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3785 B->factortype = MAT_FACTOR_LU; 3786 mumps->sym = 0; 3787 } else { 3788 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3789 B->factortype = MAT_FACTOR_CHOLESKY; 3790 #if defined(PETSC_USE_COMPLEX) 3791 mumps->sym = 2; 3792 #else 3793 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3794 else mumps->sym = 2; 3795 #endif 3796 } 3797 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3798 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3799 3800 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3801 if (size == 1) { 3802 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3803 B->canuseordering = PETSC_TRUE; 3804 } 3805 3806 /* set solvertype */ 3807 PetscCall(PetscFree(B->solvertype)); 3808 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3809 B->ops->destroy = MatDestroy_MUMPS; 3810 B->data = (void *)mumps; 3811 3812 *F = B; 3813 mumps->id.job = JOB_NULL; 3814 mumps->ICNTL_pre = NULL; 3815 mumps->CNTL_pre = NULL; 3816 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3817 PetscFunctionReturn(PETSC_SUCCESS); 3818 } 3819 3820 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3821 { 3822 PetscFunctionBegin; 3823 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3824 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3825 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3826 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3827 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3828 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3829 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3830 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3831 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3832 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3833 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3834 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3835 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3836 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3837 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3838 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3839 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3840 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3841 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3842 PetscFunctionReturn(PETSC_SUCCESS); 3843 } 3844