1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #if defined(PETSC_USE_REAL_SINGLE) 15 #include <cmumps_c.h> 16 #else 17 #include <zmumps_c.h> 18 #endif 19 #else 20 #if defined(PETSC_USE_REAL_SINGLE) 21 #include <smumps_c.h> 22 #else 23 #include <dmumps_c.h> 24 #endif 25 #endif 26 EXTERN_C_END 27 #define JOB_INIT -1 28 #define JOB_NULL 0 29 #define JOB_FACTSYMBOLIC 1 30 #define JOB_FACTNUMERIC 2 31 #define JOB_SOLVE 3 32 #define JOB_END -2 33 34 /* calls to MUMPS */ 35 #if defined(PETSC_USE_COMPLEX) 36 #if defined(PETSC_USE_REAL_SINGLE) 37 #define MUMPS_c cmumps_c 38 #else 39 #define MUMPS_c zmumps_c 40 #endif 41 #else 42 #if defined(PETSC_USE_REAL_SINGLE) 43 #define MUMPS_c smumps_c 44 #else 45 #define MUMPS_c dmumps_c 46 #endif 47 #endif 48 49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 50 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 51 naming convention in PetscMPIInt, PetscBLASInt etc. 52 */ 53 typedef MUMPS_INT PetscMUMPSInt; 54 55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 56 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 57 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 58 #endif 59 #else 60 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 61 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 62 #endif 63 #endif 64 65 #define MPIU_MUMPSINT MPI_INT 66 #define PETSC_MUMPS_INT_MAX 2147483647 67 #define PETSC_MUMPS_INT_MIN -2147483648 68 69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 70 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b) 71 { 72 PetscFunctionBegin; 73 #if PetscDefined(USE_64BIT_INDICES) 74 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 75 #endif 76 *b = (PetscMUMPSInt)a; 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 /* Put these utility routines here since they are only used in this file */ 81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 82 { 83 PetscInt myval; 84 PetscBool myset; 85 86 PetscFunctionBegin; 87 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 88 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 89 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 90 if (set) *set = myset; 91 PetscFunctionReturn(PETSC_SUCCESS); 92 } 93 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 94 95 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 96 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 97 #define PetscMUMPS_c(mumps) \ 98 do { \ 99 if (mumps->use_petsc_omp_support) { \ 100 if (mumps->is_omp_master) { \ 101 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 102 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 103 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 104 PetscCall(PetscFPTrapPop()); \ 105 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 106 } \ 107 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 108 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 109 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 110 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 111 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 112 */ \ 113 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 115 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 116 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 117 } else { \ 118 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 119 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 120 PetscCall(PetscFPTrapPop()); \ 121 } \ 122 } while (0) 123 #else 124 #define PetscMUMPS_c(mumps) \ 125 do { \ 126 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 127 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 128 PetscCall(PetscFPTrapPop()); \ 129 } while (0) 130 #endif 131 132 /* declare MumpsScalar */ 133 #if defined(PETSC_USE_COMPLEX) 134 #if defined(PETSC_USE_REAL_SINGLE) 135 #define MumpsScalar mumps_complex 136 #else 137 #define MumpsScalar mumps_double_complex 138 #endif 139 #else 140 #define MumpsScalar PetscScalar 141 #endif 142 143 /* macros s.t. indices match MUMPS documentation */ 144 #define ICNTL(I) icntl[(I) - 1] 145 #define CNTL(I) cntl[(I) - 1] 146 #define INFOG(I) infog[(I) - 1] 147 #define INFO(I) info[(I) - 1] 148 #define RINFOG(I) rinfog[(I) - 1] 149 #define RINFO(I) rinfo[(I) - 1] 150 151 typedef struct Mat_MUMPS Mat_MUMPS; 152 struct Mat_MUMPS { 153 #if defined(PETSC_USE_COMPLEX) 154 #if defined(PETSC_USE_REAL_SINGLE) 155 CMUMPS_STRUC_C id; 156 #else 157 ZMUMPS_STRUC_C id; 158 #endif 159 #else 160 #if defined(PETSC_USE_REAL_SINGLE) 161 SMUMPS_STRUC_C id; 162 #else 163 DMUMPS_STRUC_C id; 164 #endif 165 #endif 166 167 MatStructure matstruc; 168 PetscMPIInt myid, petsc_size; 169 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 170 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 171 PetscCount nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 172 PetscMUMPSInt sym; 173 MPI_Comm mumps_comm; 174 PetscMUMPSInt *ICNTL_pre; 175 PetscReal *CNTL_pre; 176 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 177 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 178 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 179 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 180 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 181 PetscInt *rhs_nrow, max_nrhs; 182 PetscMPIInt *rhs_recvcounts, *rhs_disps; 183 PetscScalar *rhs_loc, *rhs_recvbuf; 184 #endif 185 Vec b_seq, x_seq; 186 PetscInt ninfo, *info; /* which INFO to display */ 187 PetscInt sizeredrhs; 188 PetscScalar *schur_sol; 189 PetscInt schur_sizesol; 190 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 191 PetscCount cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 192 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 193 194 /* Support for MATNEST */ 195 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 196 PetscCount *nest_vals_start; 197 PetscScalar *nest_vals; 198 199 /* stuff used by petsc/mumps OpenMP support*/ 200 PetscBool use_petsc_omp_support; 201 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 202 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */ 203 PetscCount *recvcount; /* a collection of nnz on omp_master */ 204 PetscMPIInt tag, omp_comm_size; 205 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 206 MPI_Request *reqs; 207 }; 208 209 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 210 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 211 */ 212 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 213 { 214 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */ 215 216 PetscFunctionBegin; 217 #if defined(PETSC_USE_64BIT_INDICES) 218 { 219 PetscInt i; 220 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 221 PetscCall(PetscFree(mumps->ia_alloc)); 222 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 223 mumps->cur_ilen = nrow + 1; 224 } 225 if (nnz > mumps->cur_jlen) { 226 PetscCall(PetscFree(mumps->ja_alloc)); 227 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 228 mumps->cur_jlen = nnz; 229 } 230 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i])); 231 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i])); 232 *ia_mumps = mumps->ia_alloc; 233 *ja_mumps = mumps->ja_alloc; 234 } 235 #else 236 *ia_mumps = ia; 237 *ja_mumps = ja; 238 #endif 239 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 240 PetscFunctionReturn(PETSC_SUCCESS); 241 } 242 243 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 244 { 245 PetscFunctionBegin; 246 PetscCall(PetscFree(mumps->id.listvar_schur)); 247 PetscCall(PetscFree(mumps->id.redrhs)); 248 PetscCall(PetscFree(mumps->schur_sol)); 249 mumps->id.size_schur = 0; 250 mumps->id.schur_lld = 0; 251 mumps->id.ICNTL(19) = 0; 252 PetscFunctionReturn(PETSC_SUCCESS); 253 } 254 255 /* solve with rhs in mumps->id.redrhs and return in the same location */ 256 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 257 { 258 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 259 Mat S, B, X; 260 MatFactorSchurStatus schurstatus; 261 PetscInt sizesol; 262 263 PetscFunctionBegin; 264 PetscCall(MatFactorFactorizeSchurComplement(F)); 265 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 266 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 267 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 268 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 269 PetscCall(MatBindToCPU(B, S->boundtocpu)); 270 #endif 271 switch (schurstatus) { 272 case MAT_FACTOR_SCHUR_FACTORED: 273 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 274 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 275 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 276 PetscCall(MatBindToCPU(X, S->boundtocpu)); 277 #endif 278 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 279 PetscCall(MatMatSolveTranspose(S, B, X)); 280 } else { 281 PetscCall(MatMatSolve(S, B, X)); 282 } 283 break; 284 case MAT_FACTOR_SCHUR_INVERTED: 285 sizesol = mumps->id.nrhs * mumps->id.size_schur; 286 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 287 PetscCall(PetscFree(mumps->schur_sol)); 288 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 289 mumps->schur_sizesol = sizesol; 290 } 291 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 292 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 294 PetscCall(MatBindToCPU(X, S->boundtocpu)); 295 #endif 296 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 297 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 298 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 299 } else { 300 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 301 } 302 PetscCall(MatProductSetFromOptions(X)); 303 PetscCall(MatProductSymbolic(X)); 304 PetscCall(MatProductNumeric(X)); 305 306 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 307 break; 308 default: 309 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 310 } 311 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 312 PetscCall(MatDestroy(&B)); 313 PetscCall(MatDestroy(&X)); 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 318 { 319 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 320 321 PetscFunctionBegin; 322 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 if (!expansion) { /* prepare for the condensation step */ 326 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 327 /* allocate MUMPS internal array to store reduced right-hand sides */ 328 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 329 PetscCall(PetscFree(mumps->id.redrhs)); 330 mumps->id.lredrhs = mumps->id.size_schur; 331 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 332 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 333 } 334 } else { /* prepare for the expansion step */ 335 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 336 PetscCall(MatMumpsSolveSchur_Private(F)); 337 mumps->id.ICNTL(26) = 2; /* expansion phase */ 338 PetscMUMPS_c(mumps); 339 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 340 /* restore defaults */ 341 mumps->id.ICNTL(26) = -1; 342 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 343 if (mumps->id.nrhs > 1) { 344 PetscCall(PetscFree(mumps->id.redrhs)); 345 mumps->id.lredrhs = 0; 346 mumps->sizeredrhs = 0; 347 } 348 } 349 PetscFunctionReturn(PETSC_SUCCESS); 350 } 351 352 /* 353 MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz] 354 355 input: 356 A - matrix in aij,baij or sbaij format 357 shift - 0: C style output triple; 1: Fortran style output triple. 358 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 359 MAT_REUSE_MATRIX: only the values in v array are updated 360 output: 361 nnz - dim of r, c, and v (number of local nonzero entries of A) 362 r, c, v - row and col index, matrix values (matrix triples) 363 364 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 365 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 366 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 367 368 */ 369 370 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 371 { 372 const PetscScalar *av; 373 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 374 PetscCount nz, rnz, k; 375 PetscMUMPSInt *row, *col; 376 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 377 378 PetscFunctionBegin; 379 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 380 if (reuse == MAT_INITIAL_MATRIX) { 381 nz = aa->nz; 382 ai = aa->i; 383 aj = aa->j; 384 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 385 for (PetscCount i = k = 0; i < M; i++) { 386 rnz = ai[i + 1] - ai[i]; 387 ajj = aj + ai[i]; 388 for (PetscCount j = 0; j < rnz; j++) { 389 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 390 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 391 k++; 392 } 393 } 394 mumps->val = (PetscScalar *)av; 395 mumps->irn = row; 396 mumps->jcn = col; 397 mumps->nnz = nz; 398 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 399 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 400 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 401 PetscFunctionReturn(PETSC_SUCCESS); 402 } 403 404 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 405 { 406 PetscCount nz, i, j, k, r; 407 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 408 PetscMUMPSInt *row, *col; 409 410 PetscFunctionBegin; 411 nz = a->sliidx[a->totalslices]; 412 if (reuse == MAT_INITIAL_MATRIX) { 413 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 414 for (i = k = 0; i < a->totalslices; i++) { 415 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 416 } 417 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 418 mumps->irn = row; 419 mumps->jcn = col; 420 mumps->nnz = nz; 421 mumps->val = a->val; 422 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 423 else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 424 PetscFunctionReturn(PETSC_SUCCESS); 425 } 426 427 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 428 { 429 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 430 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 431 PetscCount M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 432 PetscInt bs; 433 PetscMUMPSInt *row, *col; 434 435 PetscFunctionBegin; 436 if (reuse == MAT_INITIAL_MATRIX) { 437 PetscCall(MatGetBlockSize(A, &bs)); 438 M = A->rmap->N / bs; 439 ai = aa->i; 440 aj = aa->j; 441 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 442 for (i = 0; i < M; i++) { 443 ajj = aj + ai[i]; 444 rnz = ai[i + 1] - ai[i]; 445 for (k = 0; k < rnz; k++) { 446 for (j = 0; j < bs; j++) { 447 for (m = 0; m < bs; m++) { 448 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 449 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 450 idx++; 451 } 452 } 453 } 454 } 455 mumps->irn = row; 456 mumps->jcn = col; 457 mumps->nnz = nz; 458 mumps->val = aa->a; 459 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 460 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 461 PetscFunctionReturn(PETSC_SUCCESS); 462 } 463 464 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 465 { 466 const PetscInt *ai, *aj, *ajj; 467 PetscInt bs; 468 PetscCount nz, rnz, i, j, k, m; 469 PetscMUMPSInt *row, *col; 470 PetscScalar *val; 471 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 472 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 473 #if defined(PETSC_USE_COMPLEX) 474 PetscBool isset, hermitian; 475 #endif 476 477 PetscFunctionBegin; 478 #if defined(PETSC_USE_COMPLEX) 479 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 480 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 481 #endif 482 ai = aa->i; 483 aj = aa->j; 484 PetscCall(MatGetBlockSize(A, &bs)); 485 if (reuse == MAT_INITIAL_MATRIX) { 486 const PetscCount alloc_size = aa->nz * bs2; 487 488 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 489 if (bs > 1) { 490 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 491 mumps->val = mumps->val_alloc; 492 } else { 493 mumps->val = aa->a; 494 } 495 mumps->irn = row; 496 mumps->jcn = col; 497 } else { 498 row = mumps->irn; 499 col = mumps->jcn; 500 } 501 val = mumps->val; 502 503 nz = 0; 504 if (bs > 1) { 505 for (i = 0; i < mbs; i++) { 506 rnz = ai[i + 1] - ai[i]; 507 ajj = aj + ai[i]; 508 for (j = 0; j < rnz; j++) { 509 for (k = 0; k < bs; k++) { 510 for (m = 0; m < bs; m++) { 511 if (ajj[j] > i || k >= m) { 512 if (reuse == MAT_INITIAL_MATRIX) { 513 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 514 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 515 } 516 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 517 } 518 } 519 } 520 } 521 } 522 } else if (reuse == MAT_INITIAL_MATRIX) { 523 for (i = 0; i < mbs; i++) { 524 rnz = ai[i + 1] - ai[i]; 525 ajj = aj + ai[i]; 526 for (j = 0; j < rnz; j++) { 527 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 528 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 529 nz++; 530 } 531 } 532 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz); 533 } else if (mumps->nest_vals) 534 PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 535 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 536 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 537 PetscFunctionReturn(PETSC_SUCCESS); 538 } 539 540 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 541 { 542 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 543 PetscCount nz, rnz, i, j; 544 const PetscScalar *av, *v1; 545 PetscScalar *val; 546 PetscMUMPSInt *row, *col; 547 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 548 PetscBool missing; 549 #if defined(PETSC_USE_COMPLEX) 550 PetscBool hermitian, isset; 551 #endif 552 553 PetscFunctionBegin; 554 #if defined(PETSC_USE_COMPLEX) 555 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 556 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 557 #endif 558 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 559 ai = aa->i; 560 aj = aa->j; 561 adiag = aa->diag; 562 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 563 if (reuse == MAT_INITIAL_MATRIX) { 564 /* count nz in the upper triangular part of A */ 565 nz = 0; 566 if (missing) { 567 for (i = 0; i < M; i++) { 568 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 569 for (j = ai[i]; j < ai[i + 1]; j++) { 570 if (aj[j] < i) continue; 571 nz++; 572 } 573 } else { 574 nz += ai[i + 1] - adiag[i]; 575 } 576 } 577 } else { 578 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 579 } 580 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 581 PetscCall(PetscMalloc1(nz, &val)); 582 mumps->nnz = nz; 583 mumps->irn = row; 584 mumps->jcn = col; 585 mumps->val = mumps->val_alloc = val; 586 587 nz = 0; 588 if (missing) { 589 for (i = 0; i < M; i++) { 590 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 591 for (j = ai[i]; j < ai[i + 1]; j++) { 592 if (aj[j] < i) continue; 593 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 594 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 595 val[nz] = av[j]; 596 nz++; 597 } 598 } else { 599 rnz = ai[i + 1] - adiag[i]; 600 ajj = aj + adiag[i]; 601 v1 = av + adiag[i]; 602 for (j = 0; j < rnz; j++) { 603 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 604 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 605 val[nz++] = v1[j]; 606 } 607 } 608 } 609 } else { 610 for (i = 0; i < M; i++) { 611 rnz = ai[i + 1] - adiag[i]; 612 ajj = aj + adiag[i]; 613 v1 = av + adiag[i]; 614 for (j = 0; j < rnz; j++) { 615 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 616 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 617 val[nz++] = v1[j]; 618 } 619 } 620 } 621 } else { 622 nz = 0; 623 val = mumps->val; 624 if (missing) { 625 for (i = 0; i < M; i++) { 626 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 627 for (j = ai[i]; j < ai[i + 1]; j++) { 628 if (aj[j] < i) continue; 629 val[nz++] = av[j]; 630 } 631 } else { 632 rnz = ai[i + 1] - adiag[i]; 633 v1 = av + adiag[i]; 634 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 635 } 636 } 637 } else { 638 for (i = 0; i < M; i++) { 639 rnz = ai[i + 1] - adiag[i]; 640 v1 = av + adiag[i]; 641 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 642 } 643 } 644 } 645 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 646 PetscFunctionReturn(PETSC_SUCCESS); 647 } 648 649 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 650 { 651 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 652 PetscInt bs; 653 PetscCount rstart, nz, i, j, k, m, jj, irow, countA, countB; 654 PetscMUMPSInt *row, *col; 655 const PetscScalar *av, *bv, *v1, *v2; 656 PetscScalar *val; 657 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 658 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)mat->A->data; 659 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 660 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 661 #if defined(PETSC_USE_COMPLEX) 662 PetscBool hermitian, isset; 663 #endif 664 665 PetscFunctionBegin; 666 #if defined(PETSC_USE_COMPLEX) 667 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 668 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 669 #endif 670 PetscCall(MatGetBlockSize(A, &bs)); 671 rstart = A->rmap->rstart; 672 ai = aa->i; 673 aj = aa->j; 674 bi = bb->i; 675 bj = bb->j; 676 av = aa->a; 677 bv = bb->a; 678 679 garray = mat->garray; 680 681 if (reuse == MAT_INITIAL_MATRIX) { 682 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 683 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 684 PetscCall(PetscMalloc1(nz, &val)); 685 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 686 mumps->irn = row; 687 mumps->jcn = col; 688 mumps->val = mumps->val_alloc = val; 689 } else { 690 val = mumps->val; 691 } 692 693 jj = 0; 694 irow = rstart; 695 for (i = 0; i < mbs; i++) { 696 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 697 countA = ai[i + 1] - ai[i]; 698 countB = bi[i + 1] - bi[i]; 699 bjj = bj + bi[i]; 700 v1 = av + ai[i] * bs2; 701 v2 = bv + bi[i] * bs2; 702 703 if (bs > 1) { 704 /* A-part */ 705 for (j = 0; j < countA; j++) { 706 for (k = 0; k < bs; k++) { 707 for (m = 0; m < bs; m++) { 708 if (rstart + ajj[j] * bs > irow || k >= m) { 709 if (reuse == MAT_INITIAL_MATRIX) { 710 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 711 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 712 } 713 val[jj++] = v1[j * bs2 + m + k * bs]; 714 } 715 } 716 } 717 } 718 719 /* B-part */ 720 for (j = 0; j < countB; j++) { 721 for (k = 0; k < bs; k++) { 722 for (m = 0; m < bs; m++) { 723 if (reuse == MAT_INITIAL_MATRIX) { 724 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 725 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 726 } 727 val[jj++] = v2[j * bs2 + m + k * bs]; 728 } 729 } 730 } 731 } else { 732 /* A-part */ 733 for (j = 0; j < countA; j++) { 734 if (reuse == MAT_INITIAL_MATRIX) { 735 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 736 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 737 } 738 val[jj++] = v1[j]; 739 } 740 741 /* B-part */ 742 for (j = 0; j < countB; j++) { 743 if (reuse == MAT_INITIAL_MATRIX) { 744 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 745 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 746 } 747 val[jj++] = v2[j]; 748 } 749 } 750 irow += bs; 751 } 752 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 753 PetscFunctionReturn(PETSC_SUCCESS); 754 } 755 756 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 757 { 758 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 759 PetscCount rstart, cstart, nz, i, j, jj, irow, countA, countB; 760 PetscMUMPSInt *row, *col; 761 const PetscScalar *av, *bv, *v1, *v2; 762 PetscScalar *val; 763 Mat Ad, Ao; 764 Mat_SeqAIJ *aa; 765 Mat_SeqAIJ *bb; 766 767 PetscFunctionBegin; 768 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 769 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 770 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 771 772 aa = (Mat_SeqAIJ *)Ad->data; 773 bb = (Mat_SeqAIJ *)Ao->data; 774 ai = aa->i; 775 aj = aa->j; 776 bi = bb->i; 777 bj = bb->j; 778 779 rstart = A->rmap->rstart; 780 cstart = A->cmap->rstart; 781 782 if (reuse == MAT_INITIAL_MATRIX) { 783 nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 784 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 785 PetscCall(PetscMalloc1(nz, &val)); 786 mumps->nnz = nz; 787 mumps->irn = row; 788 mumps->jcn = col; 789 mumps->val = mumps->val_alloc = val; 790 } else { 791 val = mumps->val; 792 } 793 794 jj = 0; 795 irow = rstart; 796 for (i = 0; i < m; i++) { 797 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 798 countA = ai[i + 1] - ai[i]; 799 countB = bi[i + 1] - bi[i]; 800 bjj = bj + bi[i]; 801 v1 = av + ai[i]; 802 v2 = bv + bi[i]; 803 804 /* A-part */ 805 for (j = 0; j < countA; j++) { 806 if (reuse == MAT_INITIAL_MATRIX) { 807 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 808 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 809 } 810 val[jj++] = v1[j]; 811 } 812 813 /* B-part */ 814 for (j = 0; j < countB; j++) { 815 if (reuse == MAT_INITIAL_MATRIX) { 816 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 817 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 818 } 819 val[jj++] = v2[j]; 820 } 821 irow++; 822 } 823 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 824 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 825 PetscFunctionReturn(PETSC_SUCCESS); 826 } 827 828 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 829 { 830 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 831 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)mat->A->data; 832 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 833 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 834 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 835 const PetscInt bs2 = mat->bs2; 836 PetscInt bs; 837 PetscCount nz, i, j, k, n, jj, irow, countA, countB, idx; 838 PetscMUMPSInt *row, *col; 839 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 840 PetscScalar *val; 841 842 PetscFunctionBegin; 843 PetscCall(MatGetBlockSize(A, &bs)); 844 if (reuse == MAT_INITIAL_MATRIX) { 845 nz = bs2 * (aa->nz + bb->nz); 846 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 847 PetscCall(PetscMalloc1(nz, &val)); 848 mumps->nnz = nz; 849 mumps->irn = row; 850 mumps->jcn = col; 851 mumps->val = mumps->val_alloc = val; 852 } else { 853 val = mumps->val; 854 } 855 856 jj = 0; 857 irow = rstart; 858 for (i = 0; i < mbs; i++) { 859 countA = ai[i + 1] - ai[i]; 860 countB = bi[i + 1] - bi[i]; 861 ajj = aj + ai[i]; 862 bjj = bj + bi[i]; 863 v1 = av + bs2 * ai[i]; 864 v2 = bv + bs2 * bi[i]; 865 866 idx = 0; 867 /* A-part */ 868 for (k = 0; k < countA; k++) { 869 for (j = 0; j < bs; j++) { 870 for (n = 0; n < bs; n++) { 871 if (reuse == MAT_INITIAL_MATRIX) { 872 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 873 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 874 } 875 val[jj++] = v1[idx++]; 876 } 877 } 878 } 879 880 idx = 0; 881 /* B-part */ 882 for (k = 0; k < countB; k++) { 883 for (j = 0; j < bs; j++) { 884 for (n = 0; n < bs; n++) { 885 if (reuse == MAT_INITIAL_MATRIX) { 886 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 887 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 888 } 889 val[jj++] = v2[idx++]; 890 } 891 } 892 } 893 irow += bs; 894 } 895 PetscFunctionReturn(PETSC_SUCCESS); 896 } 897 898 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 899 { 900 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 901 PetscCount rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 902 PetscMUMPSInt *row, *col; 903 const PetscScalar *av, *bv, *v1, *v2; 904 PetscScalar *val; 905 Mat Ad, Ao; 906 Mat_SeqAIJ *aa; 907 Mat_SeqAIJ *bb; 908 #if defined(PETSC_USE_COMPLEX) 909 PetscBool hermitian, isset; 910 #endif 911 912 PetscFunctionBegin; 913 #if defined(PETSC_USE_COMPLEX) 914 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 915 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 916 #endif 917 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 918 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 919 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 920 921 aa = (Mat_SeqAIJ *)Ad->data; 922 bb = (Mat_SeqAIJ *)Ao->data; 923 ai = aa->i; 924 aj = aa->j; 925 adiag = aa->diag; 926 bi = bb->i; 927 bj = bb->j; 928 929 rstart = A->rmap->rstart; 930 931 if (reuse == MAT_INITIAL_MATRIX) { 932 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 933 nzb = 0; /* num of upper triangular entries in mat->B */ 934 for (i = 0; i < m; i++) { 935 nza += (ai[i + 1] - adiag[i]); 936 countB = bi[i + 1] - bi[i]; 937 bjj = bj + bi[i]; 938 for (j = 0; j < countB; j++) { 939 if (garray[bjj[j]] > rstart) nzb++; 940 } 941 } 942 943 nz = nza + nzb; /* total nz of upper triangular part of mat */ 944 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 945 PetscCall(PetscMalloc1(nz, &val)); 946 mumps->nnz = nz; 947 mumps->irn = row; 948 mumps->jcn = col; 949 mumps->val = mumps->val_alloc = val; 950 } else { 951 val = mumps->val; 952 } 953 954 jj = 0; 955 irow = rstart; 956 for (i = 0; i < m; i++) { 957 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 958 v1 = av + adiag[i]; 959 countA = ai[i + 1] - adiag[i]; 960 countB = bi[i + 1] - bi[i]; 961 bjj = bj + bi[i]; 962 v2 = bv + bi[i]; 963 964 /* A-part */ 965 for (j = 0; j < countA; j++) { 966 if (reuse == MAT_INITIAL_MATRIX) { 967 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 968 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 969 } 970 val[jj++] = v1[j]; 971 } 972 973 /* B-part */ 974 for (j = 0; j < countB; j++) { 975 if (garray[bjj[j]] > rstart) { 976 if (reuse == MAT_INITIAL_MATRIX) { 977 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 978 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 979 } 980 val[jj++] = v2[j]; 981 } 982 } 983 irow++; 984 } 985 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 986 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 987 PetscFunctionReturn(PETSC_SUCCESS); 988 } 989 990 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 991 { 992 const PetscScalar *av; 993 const PetscInt M = A->rmap->n; 994 PetscCount i; 995 PetscMUMPSInt *row, *col; 996 Vec v; 997 998 PetscFunctionBegin; 999 PetscCall(MatDiagonalGetDiagonal(A, &v)); 1000 PetscCall(VecGetArrayRead(v, &av)); 1001 if (reuse == MAT_INITIAL_MATRIX) { 1002 PetscCall(PetscMalloc2(M, &row, M, &col)); 1003 for (i = 0; i < M; i++) { 1004 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1005 col[i] = row[i]; 1006 } 1007 mumps->val = (PetscScalar *)av; 1008 mumps->irn = row; 1009 mumps->jcn = col; 1010 mumps->nnz = M; 1011 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1012 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1013 PetscCall(VecRestoreArrayRead(v, &av)); 1014 PetscFunctionReturn(PETSC_SUCCESS); 1015 } 1016 1017 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1018 { 1019 PetscScalar *v; 1020 const PetscInt m = A->rmap->n, N = A->cmap->N; 1021 PetscInt lda; 1022 PetscCount i, j; 1023 PetscMUMPSInt *row, *col; 1024 1025 PetscFunctionBegin; 1026 PetscCall(MatDenseGetArray(A, &v)); 1027 PetscCall(MatDenseGetLDA(A, &lda)); 1028 if (reuse == MAT_INITIAL_MATRIX) { 1029 PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 1030 for (i = 0; i < m; i++) { 1031 col[i] = 0; 1032 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1033 } 1034 for (j = 1; j < N; j++) { 1035 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 1036 PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 1037 } 1038 if (lda == m) mumps->val = v; 1039 else { 1040 PetscCall(PetscMalloc1(m * N, &mumps->val)); 1041 mumps->val_alloc = mumps->val; 1042 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1043 } 1044 mumps->irn = row; 1045 mumps->jcn = col; 1046 mumps->nnz = m * N; 1047 } else { 1048 if (lda == m && !mumps->nest_vals) mumps->val = v; 1049 else { 1050 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1051 } 1052 } 1053 PetscCall(MatDenseRestoreArray(A, &v)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a 1058 // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated 1059 // and its rows and columns permuted 1060 // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest() 1061 static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap) 1062 { 1063 Mat A; 1064 PetscScalar s[2]; 1065 PetscBool isTrans, isHTrans, compare; 1066 1067 PetscFunctionBegin; 1068 do { 1069 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1070 if (isTrans) { 1071 PetscCall(MatTransposeGetMat(*sub, &A)); 1072 isHTrans = PETSC_FALSE; 1073 } else { 1074 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1075 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A)); 1076 } 1077 compare = (PetscBool)(isTrans || isHTrans); 1078 if (compare) { 1079 if (vshift && vscale) { 1080 PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 1081 if (!*conjugate) { 1082 *vshift += s[0] * *vscale; 1083 *vscale *= s[1]; 1084 } else { 1085 *vshift += PetscConj(s[0]) * *vscale; 1086 *vscale *= PetscConj(s[1]); 1087 } 1088 } 1089 if (swap) *swap = (PetscBool)!*swap; 1090 if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate; 1091 *sub = A; 1092 } 1093 } while (compare); 1094 PetscFunctionReturn(PETSC_SUCCESS); 1095 } 1096 1097 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1098 { 1099 Mat **mats; 1100 PetscInt nr, nc; 1101 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1102 1103 PetscFunctionBegin; 1104 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1105 if (reuse == MAT_INITIAL_MATRIX) { 1106 PetscMUMPSInt *irns, *jcns; 1107 PetscScalar *vals; 1108 PetscCount totnnz, cumnnz, maxnnz; 1109 PetscInt *pjcns_w; 1110 IS *rows, *cols; 1111 PetscInt **rows_idx, **cols_idx; 1112 1113 cumnnz = 0; 1114 maxnnz = 0; 1115 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1116 for (PetscInt r = 0; r < nr; r++) { 1117 for (PetscInt c = 0; c < nc; c++) { 1118 Mat sub = mats[r][c]; 1119 1120 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1121 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1122 if (sub) { 1123 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1124 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 1125 MatInfo info; 1126 1127 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 1128 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1129 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1130 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1131 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1132 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1133 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1134 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1135 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 1136 1137 if (chol) { 1138 if (r == c) { 1139 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1140 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1141 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1142 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1143 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1144 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1145 } else { 1146 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1147 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1148 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1149 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1150 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1151 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1152 } 1153 } else { 1154 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1155 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1156 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1157 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1158 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1159 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1160 } 1161 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1162 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1163 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1164 cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */ 1165 maxnnz = PetscMax(maxnnz, info.nz_used); 1166 } 1167 } 1168 } 1169 1170 /* Allocate total COO */ 1171 totnnz = cumnnz; 1172 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1173 PetscCall(PetscMalloc1(totnnz, &vals)); 1174 1175 /* Handle rows and column maps 1176 We directly map rows and use an SF for the columns */ 1177 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1178 PetscCall(MatNestGetISs(A, rows, cols)); 1179 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1180 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1181 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1182 else (void)maxnnz; 1183 1184 cumnnz = 0; 1185 for (PetscInt r = 0; r < nr; r++) { 1186 for (PetscInt c = 0; c < nc; c++) { 1187 Mat sub = mats[r][c]; 1188 const PetscInt *ridx = rows_idx[r]; 1189 const PetscInt *cidx = cols_idx[c]; 1190 PetscScalar vscale = 1.0, vshift = 0.0; 1191 PetscInt rst; 1192 PetscSF csf; 1193 PetscBool conjugate = PETSC_FALSE, swap = PETSC_FALSE; 1194 PetscLayout cmap; 1195 PetscInt innz; 1196 1197 mumps->nest_vals_start[r * nc + c] = cumnnz; 1198 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1199 1200 /* Extract inner blocks if needed */ 1201 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap)); 1202 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1203 1204 /* Get column layout to map off-process columns */ 1205 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1206 1207 /* Get row start to map on-process rows */ 1208 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1209 1210 /* Directly use the mumps datastructure and use C ordering for now */ 1211 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1212 1213 /* Swap the role of rows and columns indices for transposed blocks 1214 since we need values with global final ordering */ 1215 if (swap) { 1216 cidx = rows_idx[r]; 1217 ridx = cols_idx[c]; 1218 } 1219 1220 /* Communicate column indices 1221 This could have been done with a single SF but it would have complicated the code a lot. 1222 But since we do it only once, we pay the price of setting up an SF for each block */ 1223 if (PetscDefined(USE_64BIT_INDICES)) { 1224 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1225 } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */ 1226 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1227 PetscCall(PetscIntCast(mumps->nnz, &innz)); 1228 PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1229 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1230 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1231 PetscCall(PetscSFDestroy(&csf)); 1232 1233 /* Import indices: use direct map for rows and mapped indices for columns */ 1234 if (swap) { 1235 for (PetscInt k = 0; k < mumps->nnz; k++) { 1236 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1237 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1238 } 1239 } else { 1240 for (PetscInt k = 0; k < mumps->nnz; k++) { 1241 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1242 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1243 } 1244 } 1245 1246 /* Import values to full COO */ 1247 if (conjugate) { /* conjugate the entries */ 1248 PetscScalar *v = vals + cumnnz; 1249 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]); 1250 } else if (vscale != 1.0) { 1251 PetscScalar *v = vals + cumnnz; 1252 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k]; 1253 } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1254 1255 /* Shift new starting point and sanity check */ 1256 cumnnz += mumps->nnz; 1257 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1258 1259 /* Free scratch memory */ 1260 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1261 PetscCall(PetscFree(mumps->val_alloc)); 1262 mumps->val = NULL; 1263 mumps->nnz = 0; 1264 } 1265 } 1266 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1267 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1268 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1269 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1270 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1271 mumps->nest_vals_start[nr * nc] = cumnnz; 1272 1273 /* Set pointers for final MUMPS data structure */ 1274 mumps->nest_vals = vals; 1275 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1276 mumps->val = vals; 1277 mumps->irn = irns; 1278 mumps->jcn = jcns; 1279 mumps->nnz = cumnnz; 1280 } else { 1281 PetscScalar *oval = mumps->nest_vals; 1282 for (PetscInt r = 0; r < nr; r++) { 1283 for (PetscInt c = 0; c < nc; c++) { 1284 PetscBool conjugate = PETSC_FALSE; 1285 Mat sub = mats[r][c]; 1286 PetscScalar vscale = 1.0, vshift = 0.0; 1287 PetscInt midx = r * nc + c; 1288 1289 if (!mumps->nest_convert_to_triples[midx]) continue; 1290 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL)); 1291 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1292 mumps->val = oval + mumps->nest_vals_start[midx]; 1293 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1294 if (conjugate) { 1295 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1296 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]); 1297 } else if (vscale != 1.0) { 1298 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1299 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale; 1300 } 1301 } 1302 } 1303 mumps->val = oval; 1304 } 1305 PetscFunctionReturn(PETSC_SUCCESS); 1306 } 1307 1308 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1309 { 1310 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1311 1312 PetscFunctionBegin; 1313 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1314 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1315 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1316 PetscCall(VecDestroy(&mumps->b_seq)); 1317 PetscCall(VecDestroy(&mumps->x_seq)); 1318 PetscCall(PetscFree(mumps->id.perm_in)); 1319 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1320 PetscCall(PetscFree(mumps->val_alloc)); 1321 PetscCall(PetscFree(mumps->info)); 1322 PetscCall(PetscFree(mumps->ICNTL_pre)); 1323 PetscCall(PetscFree(mumps->CNTL_pre)); 1324 PetscCall(MatMumpsResetSchur_Private(mumps)); 1325 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1326 mumps->id.job = JOB_END; 1327 PetscMUMPS_c(mumps); 1328 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1329 if (mumps->mumps_comm != MPI_COMM_NULL) { 1330 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1331 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1332 } 1333 } 1334 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1335 if (mumps->use_petsc_omp_support) { 1336 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1337 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1338 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1339 } 1340 #endif 1341 PetscCall(PetscFree(mumps->ia_alloc)); 1342 PetscCall(PetscFree(mumps->ja_alloc)); 1343 PetscCall(PetscFree(mumps->recvcount)); 1344 PetscCall(PetscFree(mumps->reqs)); 1345 PetscCall(PetscFree(mumps->irhs_loc)); 1346 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1347 PetscCall(PetscFree(mumps->nest_vals)); 1348 PetscCall(PetscFree(A->data)); 1349 1350 /* clear composed functions */ 1351 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1352 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1353 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1354 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1355 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1356 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1357 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1358 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1359 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1360 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1361 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1362 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1363 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1364 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1365 PetscFunctionReturn(PETSC_SUCCESS); 1366 } 1367 1368 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1369 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1370 { 1371 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1372 const PetscMPIInt ompsize = mumps->omp_comm_size; 1373 PetscInt i, m, M, rstart; 1374 1375 PetscFunctionBegin; 1376 PetscCall(MatGetSize(A, &M, NULL)); 1377 PetscCall(MatGetLocalSize(A, &m, NULL)); 1378 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1379 if (ompsize == 1) { 1380 if (!mumps->irhs_loc) { 1381 mumps->nloc_rhs = (PetscMUMPSInt)m; 1382 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1383 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1384 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */ 1385 } 1386 mumps->id.rhs_loc = (MumpsScalar *)array; 1387 } else { 1388 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1389 const PetscInt *ranges; 1390 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1391 MPI_Group petsc_group, omp_group; 1392 PetscScalar *recvbuf = NULL; 1393 1394 if (mumps->is_omp_master) { 1395 /* Lazily initialize the omp stuff for distributed rhs */ 1396 if (!mumps->irhs_loc) { 1397 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1398 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1399 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1400 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1401 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1402 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1403 1404 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1405 mumps->nloc_rhs = 0; 1406 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1407 for (j = 0; j < ompsize; j++) { 1408 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1409 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1410 } 1411 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1412 for (j = k = 0; j < ompsize; j++) { 1413 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1414 } 1415 1416 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1417 PetscCallMPI(MPI_Group_free(&petsc_group)); 1418 PetscCallMPI(MPI_Group_free(&omp_group)); 1419 } 1420 1421 /* Realloc buffers when current nrhs is bigger than what we have met */ 1422 if (nrhs > mumps->max_nrhs) { 1423 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1424 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1425 mumps->max_nrhs = nrhs; 1426 } 1427 1428 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1429 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1430 mumps->rhs_disps[0] = 0; 1431 for (j = 1; j < ompsize; j++) { 1432 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1433 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1434 } 1435 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1436 } 1437 1438 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1439 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1440 1441 if (mumps->is_omp_master) { 1442 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1443 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1444 for (j = 0; j < ompsize; j++) { 1445 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1446 dst = dstbase; 1447 for (i = 0; i < nrhs; i++) { 1448 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1449 src += mumps->rhs_nrow[j]; 1450 dst += mumps->nloc_rhs; 1451 } 1452 dstbase += mumps->rhs_nrow[j]; 1453 } 1454 } 1455 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1456 } 1457 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1458 } 1459 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 1460 mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs; 1461 mumps->id.lrhs_loc = mumps->nloc_rhs; 1462 mumps->id.irhs_loc = mumps->irhs_loc; 1463 PetscFunctionReturn(PETSC_SUCCESS); 1464 } 1465 1466 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1467 { 1468 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1469 const PetscScalar *rarray = NULL; 1470 PetscScalar *array; 1471 IS is_iden, is_petsc; 1472 PetscInt i; 1473 PetscBool second_solve = PETSC_FALSE; 1474 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1475 1476 PetscFunctionBegin; 1477 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1478 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1479 &cite1)); 1480 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1481 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1482 &cite2)); 1483 1484 PetscCall(VecFlag(x, A->factorerrortype)); 1485 if (A->factorerrortype) { 1486 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1487 PetscFunctionReturn(PETSC_SUCCESS); 1488 } 1489 1490 mumps->id.nrhs = 1; 1491 if (mumps->petsc_size > 1) { 1492 if (mumps->ICNTL20 == 10) { 1493 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1494 PetscCall(VecGetArrayRead(b, &rarray)); 1495 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1496 } else { 1497 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1498 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1499 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1500 if (!mumps->myid) { 1501 PetscCall(VecGetArray(mumps->b_seq, &array)); 1502 mumps->id.rhs = (MumpsScalar *)array; 1503 } 1504 } 1505 } else { /* petsc_size == 1 */ 1506 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1507 PetscCall(VecCopy(b, x)); 1508 PetscCall(VecGetArray(x, &array)); 1509 mumps->id.rhs = (MumpsScalar *)array; 1510 } 1511 1512 /* 1513 handle condensation step of Schur complement (if any) 1514 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1515 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1516 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1517 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1518 */ 1519 if (mumps->id.size_schur > 0) { 1520 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1521 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1522 second_solve = PETSC_TRUE; 1523 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1524 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1525 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1526 } 1527 /* solve phase */ 1528 mumps->id.job = JOB_SOLVE; 1529 PetscMUMPS_c(mumps); 1530 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1531 1532 /* handle expansion step of Schur complement (if any) */ 1533 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1534 else if (mumps->id.ICNTL(26) == 1) { 1535 PetscCall(MatMumpsSolveSchur_Private(A)); 1536 for (i = 0; i < mumps->id.size_schur; ++i) { 1537 #if !defined(PETSC_USE_COMPLEX) 1538 PetscScalar val = mumps->id.redrhs[i]; 1539 #else 1540 PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i; 1541 #endif 1542 array[mumps->id.listvar_schur[i] - 1] = val; 1543 } 1544 } 1545 1546 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */ 1547 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1548 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1549 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1550 } 1551 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1552 PetscInt *isol2_loc = NULL; 1553 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1554 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1555 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1556 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1557 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1558 PetscCall(ISDestroy(&is_iden)); 1559 PetscCall(ISDestroy(&is_petsc)); 1560 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1561 } 1562 1563 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1564 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1565 } 1566 1567 if (mumps->petsc_size > 1) { 1568 if (mumps->ICNTL20 == 10) { 1569 PetscCall(VecRestoreArrayRead(b, &rarray)); 1570 } else if (!mumps->myid) { 1571 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1572 } 1573 } else PetscCall(VecRestoreArray(x, &array)); 1574 1575 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1576 PetscFunctionReturn(PETSC_SUCCESS); 1577 } 1578 1579 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1580 { 1581 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1582 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1583 1584 PetscFunctionBegin; 1585 mumps->id.ICNTL(9) = 0; 1586 PetscCall(MatSolve_MUMPS(A, b, x)); 1587 mumps->id.ICNTL(9) = value; 1588 PetscFunctionReturn(PETSC_SUCCESS); 1589 } 1590 1591 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1592 { 1593 Mat Bt = NULL; 1594 PetscBool denseX, denseB, flg, flgT; 1595 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1596 PetscInt i, nrhs, M, nrhsM; 1597 PetscScalar *array; 1598 const PetscScalar *rbray; 1599 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1600 PetscMUMPSInt *isol_loc, *isol_loc_save; 1601 PetscScalar *bray, *sol_loc, *sol_loc_save; 1602 IS is_to, is_from; 1603 PetscInt k, proc, j, m, myrstart; 1604 const PetscInt *rstart; 1605 Vec v_mpi, msol_loc; 1606 VecScatter scat_sol; 1607 Vec b_seq; 1608 VecScatter scat_rhs; 1609 PetscScalar *aa; 1610 PetscInt spnr, *ia, *ja; 1611 Mat_MPIAIJ *b = NULL; 1612 1613 PetscFunctionBegin; 1614 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1615 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1616 1617 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1618 if (denseB) { 1619 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1620 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1621 } else { /* sparse B */ 1622 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1623 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1624 PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1625 PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 1626 /* input B is transpose of actual RHS matrix, 1627 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1628 PetscCall(MatTransposeGetMat(B, &Bt)); 1629 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1630 } 1631 1632 PetscCall(MatGetSize(B, &M, &nrhs)); 1633 PetscCall(PetscIntMultError(nrhs, M, &nrhsM)); 1634 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 1635 mumps->id.lrhs = (PetscMUMPSInt)M; 1636 mumps->id.rhs = NULL; 1637 1638 if (mumps->petsc_size == 1) { 1639 PetscScalar *aa; 1640 PetscInt spnr, *ia, *ja; 1641 PetscBool second_solve = PETSC_FALSE; 1642 1643 PetscCall(MatDenseGetArray(X, &array)); 1644 mumps->id.rhs = (MumpsScalar *)array; 1645 1646 if (denseB) { 1647 /* copy B to X */ 1648 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1649 PetscCall(PetscArraycpy(array, rbray, nrhsM)); 1650 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1651 } else { /* sparse B */ 1652 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1653 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1654 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1655 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1656 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1657 } 1658 /* handle condensation step of Schur complement (if any) */ 1659 if (mumps->id.size_schur > 0) { 1660 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1661 second_solve = PETSC_TRUE; 1662 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1663 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1664 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1665 } 1666 /* solve phase */ 1667 mumps->id.job = JOB_SOLVE; 1668 PetscMUMPS_c(mumps); 1669 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1670 1671 /* handle expansion step of Schur complement (if any) */ 1672 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1673 else if (mumps->id.ICNTL(26) == 1) { 1674 PetscCall(MatMumpsSolveSchur_Private(A)); 1675 for (j = 0; j < nrhs; ++j) 1676 for (i = 0; i < mumps->id.size_schur; ++i) { 1677 #if !defined(PETSC_USE_COMPLEX) 1678 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs]; 1679 #else 1680 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i; 1681 #endif 1682 array[mumps->id.listvar_schur[i] - 1 + j * M] = val; 1683 } 1684 } 1685 if (!denseB) { /* sparse B */ 1686 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1687 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1688 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1689 } 1690 PetscCall(MatDenseRestoreArray(X, &array)); 1691 PetscFunctionReturn(PETSC_SUCCESS); 1692 } 1693 1694 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1695 PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1696 1697 /* create msol_loc to hold mumps local solution */ 1698 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1699 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1700 1701 lsol_loc = mumps->id.lsol_loc; 1702 PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */ 1703 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1704 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1705 mumps->id.isol_loc = isol_loc; 1706 1707 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1708 1709 if (denseB) { 1710 if (mumps->ICNTL20 == 10) { 1711 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1712 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1713 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1714 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1715 PetscCall(MatGetLocalSize(B, &m, NULL)); 1716 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); 1717 } else { 1718 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1719 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1720 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1721 0, re-arrange B into desired order, which is a local operation. 1722 */ 1723 1724 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1725 /* wrap dense rhs matrix B into a vector v_mpi */ 1726 PetscCall(MatGetLocalSize(B, &m, NULL)); 1727 PetscCall(MatDenseGetArray(B, &bray)); 1728 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, (const PetscScalar *)bray, &v_mpi)); 1729 PetscCall(MatDenseRestoreArray(B, &bray)); 1730 1731 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1732 if (!mumps->myid) { 1733 PetscInt *idx; 1734 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1735 PetscCall(PetscMalloc1(nrhsM, &idx)); 1736 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1737 for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) { 1738 for (j = 0; j < nrhs; j++) { 1739 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1740 } 1741 } 1742 1743 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq)); 1744 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to)); 1745 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from)); 1746 } else { 1747 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1748 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1749 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1750 } 1751 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1752 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1753 PetscCall(ISDestroy(&is_to)); 1754 PetscCall(ISDestroy(&is_from)); 1755 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1756 1757 if (!mumps->myid) { /* define rhs on the host */ 1758 PetscCall(VecGetArray(b_seq, &bray)); 1759 mumps->id.rhs = (MumpsScalar *)bray; 1760 PetscCall(VecRestoreArray(b_seq, &bray)); 1761 } 1762 } 1763 } else { /* sparse B */ 1764 b = (Mat_MPIAIJ *)Bt->data; 1765 1766 /* wrap dense X into a vector v_mpi */ 1767 PetscCall(MatGetLocalSize(X, &m, NULL)); 1768 PetscCall(MatDenseGetArray(X, &bray)); 1769 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, (const PetscScalar *)bray, &v_mpi)); 1770 PetscCall(MatDenseRestoreArray(X, &bray)); 1771 1772 if (!mumps->myid) { 1773 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1774 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1775 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1776 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1777 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1778 } else { 1779 mumps->id.irhs_ptr = NULL; 1780 mumps->id.irhs_sparse = NULL; 1781 mumps->id.nz_rhs = 0; 1782 mumps->id.rhs_sparse = NULL; 1783 } 1784 } 1785 1786 /* solve phase */ 1787 mumps->id.job = JOB_SOLVE; 1788 PetscMUMPS_c(mumps); 1789 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1790 1791 /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */ 1792 PetscCall(MatDenseGetArray(X, &array)); 1793 PetscCall(VecPlaceArray(v_mpi, array)); 1794 1795 /* create scatter scat_sol */ 1796 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1797 /* iidx: index for scatter mumps solution to PETSc X */ 1798 1799 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1800 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1801 for (i = 0; i < lsol_loc; i++) { 1802 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1803 1804 for (proc = 0; proc < mumps->petsc_size; proc++) { 1805 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1806 myrstart = rstart[proc]; 1807 k = isol_loc[i] - myrstart; /* local index on 1st column of PETSc vector X */ 1808 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to PETSc index in X */ 1809 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1810 break; 1811 } 1812 } 1813 1814 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1815 } 1816 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1817 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1818 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1819 PetscCall(ISDestroy(&is_from)); 1820 PetscCall(ISDestroy(&is_to)); 1821 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1822 PetscCall(MatDenseRestoreArray(X, &array)); 1823 1824 /* free spaces */ 1825 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1826 mumps->id.isol_loc = isol_loc_save; 1827 1828 PetscCall(PetscFree2(sol_loc, isol_loc)); 1829 PetscCall(PetscFree(idxx)); 1830 PetscCall(VecDestroy(&msol_loc)); 1831 PetscCall(VecDestroy(&v_mpi)); 1832 if (!denseB) { 1833 if (!mumps->myid) { 1834 b = (Mat_MPIAIJ *)Bt->data; 1835 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1836 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1837 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1838 } 1839 } else { 1840 if (mumps->ICNTL20 == 0) { 1841 PetscCall(VecDestroy(&b_seq)); 1842 PetscCall(VecScatterDestroy(&scat_rhs)); 1843 } 1844 } 1845 PetscCall(VecScatterDestroy(&scat_sol)); 1846 PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1847 PetscFunctionReturn(PETSC_SUCCESS); 1848 } 1849 1850 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1851 { 1852 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1853 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1854 1855 PetscFunctionBegin; 1856 mumps->id.ICNTL(9) = 0; 1857 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1858 mumps->id.ICNTL(9) = value; 1859 PetscFunctionReturn(PETSC_SUCCESS); 1860 } 1861 1862 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1863 { 1864 PetscBool flg; 1865 Mat B; 1866 1867 PetscFunctionBegin; 1868 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1869 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1870 1871 /* Create B=Bt^T that uses Bt's data structure */ 1872 PetscCall(MatCreateTranspose(Bt, &B)); 1873 1874 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1875 PetscCall(MatDestroy(&B)); 1876 PetscFunctionReturn(PETSC_SUCCESS); 1877 } 1878 1879 #if !defined(PETSC_USE_COMPLEX) 1880 /* 1881 input: 1882 F: numeric factor 1883 output: 1884 nneg: total number of negative pivots 1885 nzero: total number of zero pivots 1886 npos: (global dimension of F) - nneg - nzero 1887 */ 1888 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1889 { 1890 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1891 PetscMPIInt size; 1892 1893 PetscFunctionBegin; 1894 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1895 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1896 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1897 1898 if (nneg) *nneg = mumps->id.INFOG(12); 1899 if (nzero || npos) { 1900 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1901 if (nzero) *nzero = mumps->id.INFOG(28); 1902 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1903 } 1904 PetscFunctionReturn(PETSC_SUCCESS); 1905 } 1906 #endif 1907 1908 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1909 { 1910 PetscMPIInt nreqs; 1911 PetscMUMPSInt *irn, *jcn; 1912 PetscMPIInt count; 1913 PetscCount totnnz, remain; 1914 const PetscInt osize = mumps->omp_comm_size; 1915 PetscScalar *val; 1916 1917 PetscFunctionBegin; 1918 if (osize > 1) { 1919 if (reuse == MAT_INITIAL_MATRIX) { 1920 /* master first gathers counts of nonzeros to receive */ 1921 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1922 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1923 1924 /* Then each computes number of send/recvs */ 1925 if (mumps->is_omp_master) { 1926 /* Start from 1 since self communication is not done in MPI */ 1927 nreqs = 0; 1928 for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1929 } else { 1930 nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX)); 1931 } 1932 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1933 1934 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1935 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1936 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1937 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1938 */ 1939 nreqs = 0; /* counter for actual send/recvs */ 1940 if (mumps->is_omp_master) { 1941 totnnz = 0; 1942 1943 for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1944 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1945 PetscCall(PetscMalloc1(totnnz, &val)); 1946 1947 /* Self communication */ 1948 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1949 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1950 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1951 1952 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1953 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1954 PetscCall(PetscFree(mumps->val_alloc)); 1955 mumps->nnz = totnnz; 1956 mumps->irn = irn; 1957 mumps->jcn = jcn; 1958 mumps->val = mumps->val_alloc = val; 1959 1960 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1961 jcn += mumps->recvcount[0]; 1962 val += mumps->recvcount[0]; 1963 1964 /* Remote communication */ 1965 for (PetscMPIInt i = 1; i < osize; i++) { 1966 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 1967 remain = mumps->recvcount[i] - count; 1968 while (count > 0) { 1969 PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1970 PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1971 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1972 irn += count; 1973 jcn += count; 1974 val += count; 1975 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1976 remain -= count; 1977 } 1978 } 1979 } else { 1980 irn = mumps->irn; 1981 jcn = mumps->jcn; 1982 val = mumps->val; 1983 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 1984 remain = mumps->nnz - count; 1985 while (count > 0) { 1986 PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1987 PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1988 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1989 irn += count; 1990 jcn += count; 1991 val += count; 1992 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 1993 remain -= count; 1994 } 1995 } 1996 } else { 1997 nreqs = 0; 1998 if (mumps->is_omp_master) { 1999 val = mumps->val + mumps->recvcount[0]; 2000 for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 2001 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2002 remain = mumps->recvcount[i] - count; 2003 while (count > 0) { 2004 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2005 val += count; 2006 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2007 remain -= count; 2008 } 2009 } 2010 } else { 2011 val = mumps->val; 2012 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2013 remain = mumps->nnz - count; 2014 while (count > 0) { 2015 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2016 val += count; 2017 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2018 remain -= count; 2019 } 2020 } 2021 } 2022 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 2023 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 2024 } 2025 PetscFunctionReturn(PETSC_SUCCESS); 2026 } 2027 2028 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info) 2029 { 2030 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2031 PetscBool isMPIAIJ; 2032 2033 PetscFunctionBegin; 2034 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 2035 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2036 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2037 PetscFunctionReturn(PETSC_SUCCESS); 2038 } 2039 2040 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 2041 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 2042 2043 /* numerical factorization phase */ 2044 mumps->id.job = JOB_FACTNUMERIC; 2045 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2046 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 2047 } else { 2048 mumps->id.a_loc = (MumpsScalar *)mumps->val; 2049 } 2050 PetscMUMPS_c(mumps); 2051 if (mumps->id.INFOG(1) < 0) { 2052 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2053 if (mumps->id.INFOG(1) == -10) { 2054 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2055 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2056 } else if (mumps->id.INFOG(1) == -13) { 2057 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2058 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2059 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2060 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2061 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2062 } else { 2063 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2064 F->factorerrortype = MAT_FACTOR_OTHER; 2065 } 2066 } 2067 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2068 2069 F->assembled = PETSC_TRUE; 2070 2071 if (F->schur) { /* reset Schur status to unfactored */ 2072 #if defined(PETSC_HAVE_CUDA) 2073 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 2074 #endif 2075 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2076 mumps->id.ICNTL(19) = 2; 2077 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2078 } 2079 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2080 } 2081 2082 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2083 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2084 2085 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2086 if (mumps->petsc_size > 1) { 2087 PetscInt lsol_loc; 2088 PetscScalar *sol_loc; 2089 2090 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 2091 2092 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2093 if (mumps->x_seq) { 2094 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2095 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 2096 PetscCall(VecDestroy(&mumps->x_seq)); 2097 } 2098 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2099 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 2100 mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc; 2101 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 2102 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 2103 } 2104 PetscCall(PetscLogFlops((double)mumps->id.RINFO(2))); 2105 PetscFunctionReturn(PETSC_SUCCESS); 2106 } 2107 2108 /* Sets MUMPS options from the options database */ 2109 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2110 { 2111 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2112 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2113 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2114 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 2115 MumpsScalar *arr; 2116 2117 PetscFunctionBegin; 2118 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2119 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2120 PetscInt nthreads = 0; 2121 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2122 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2123 2124 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2125 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2126 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2127 2128 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2129 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2130 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2131 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2132 if (mumps->use_petsc_omp_support) { 2133 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2134 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2135 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2136 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2137 #else 2138 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2139 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2140 #endif 2141 } else { 2142 mumps->omp_comm = PETSC_COMM_SELF; 2143 mumps->mumps_comm = mumps->petsc_comm; 2144 mumps->is_omp_master = PETSC_TRUE; 2145 } 2146 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2147 mumps->reqs = NULL; 2148 mumps->tag = 0; 2149 2150 if (mumps->mumps_comm != MPI_COMM_NULL) { 2151 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2152 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2153 MPI_Comm comm; 2154 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2155 mumps->mumps_comm = comm; 2156 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2157 } 2158 2159 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2160 mumps->id.job = JOB_INIT; 2161 mumps->id.par = 1; /* host participates factorizaton and solve */ 2162 mumps->id.sym = mumps->sym; 2163 2164 size = mumps->id.size_schur; 2165 arr = mumps->id.schur; 2166 listvar_schur = mumps->id.listvar_schur; 2167 PetscMUMPS_c(mumps); 2168 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2169 2170 /* set PETSc-MUMPS default options - override MUMPS default */ 2171 mumps->id.ICNTL(3) = 0; 2172 mumps->id.ICNTL(4) = 0; 2173 if (mumps->petsc_size == 1) { 2174 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2175 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2176 } else { 2177 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2178 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2179 } 2180 2181 /* restore cached ICNTL and CNTL values */ 2182 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2183 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2184 PetscCall(PetscFree(mumps->ICNTL_pre)); 2185 PetscCall(PetscFree(mumps->CNTL_pre)); 2186 2187 if (schur) { 2188 mumps->id.size_schur = size; 2189 mumps->id.schur_lld = size; 2190 mumps->id.schur = arr; 2191 mumps->id.listvar_schur = listvar_schur; 2192 if (mumps->petsc_size > 1) { 2193 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2194 2195 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2196 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2197 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2198 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2199 } else { 2200 if (F->factortype == MAT_FACTOR_LU) { 2201 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2202 } else { 2203 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2204 } 2205 } 2206 mumps->id.ICNTL(26) = -1; 2207 } 2208 2209 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2210 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2211 */ 2212 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2213 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2214 2215 mumps->scat_rhs = NULL; 2216 mumps->scat_sol = NULL; 2217 } 2218 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2219 if (flg) mumps->id.ICNTL(1) = icntl; 2220 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2221 if (flg) mumps->id.ICNTL(2) = icntl; 2222 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2223 if (flg) mumps->id.ICNTL(3) = icntl; 2224 2225 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2226 if (flg) mumps->id.ICNTL(4) = icntl; 2227 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2228 2229 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2230 if (flg) mumps->id.ICNTL(6) = icntl; 2231 2232 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2233 if (flg) { 2234 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2235 mumps->id.ICNTL(7) = icntl; 2236 } 2237 2238 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2239 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2240 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2241 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2242 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2243 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2244 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2245 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2246 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs; 2247 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2248 if (flg) { 2249 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2250 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2251 } 2252 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2253 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2254 PetscCall(MatDestroy(&F->schur)); 2255 PetscCall(MatMumpsResetSchur_Private(mumps)); 2256 } 2257 2258 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2259 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2260 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2261 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2262 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2263 In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 2264 */ 2265 mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */ 2266 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI) 2267 mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */ 2268 #endif 2269 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2270 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2271 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2272 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2273 #endif 2274 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2275 2276 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2277 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2278 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2279 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2280 2281 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2282 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2283 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2284 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2285 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2286 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2287 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2288 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2289 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2290 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2291 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2292 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL)); 2293 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2294 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL)); 2295 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL)); 2296 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2297 2298 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2299 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2300 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2301 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2302 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2303 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2304 2305 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2306 2307 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2308 if (ninfo) { 2309 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2310 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2311 mumps->ninfo = ninfo; 2312 for (i = 0; i < ninfo; i++) { 2313 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2314 mumps->info[i] = info[i]; 2315 } 2316 } 2317 PetscOptionsEnd(); 2318 PetscFunctionReturn(PETSC_SUCCESS); 2319 } 2320 2321 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps) 2322 { 2323 PetscFunctionBegin; 2324 if (mumps->id.INFOG(1) < 0) { 2325 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2326 if (mumps->id.INFOG(1) == -6) { 2327 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2328 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2329 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2330 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2331 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2332 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2333 PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n")); 2334 } else { 2335 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2336 F->factorerrortype = MAT_FACTOR_OTHER; 2337 } 2338 } 2339 if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR; 2340 PetscFunctionReturn(PETSC_SUCCESS); 2341 } 2342 2343 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2344 { 2345 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2346 Vec b; 2347 const PetscInt M = A->rmap->N; 2348 2349 PetscFunctionBegin; 2350 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2351 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2352 PetscFunctionReturn(PETSC_SUCCESS); 2353 } 2354 2355 /* Set MUMPS options from the options database */ 2356 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2357 2358 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2359 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2360 2361 /* analysis phase */ 2362 mumps->id.job = JOB_FACTSYMBOLIC; 2363 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2364 switch (mumps->id.ICNTL(18)) { 2365 case 0: /* centralized assembled matrix input */ 2366 if (!mumps->myid) { 2367 mumps->id.nnz = mumps->nnz; 2368 mumps->id.irn = mumps->irn; 2369 mumps->id.jcn = mumps->jcn; 2370 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2371 if (r && mumps->id.ICNTL(7) == 7) { 2372 mumps->id.ICNTL(7) = 1; 2373 if (!mumps->myid) { 2374 const PetscInt *idx; 2375 PetscInt i; 2376 2377 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2378 PetscCall(ISGetIndices(r, &idx)); 2379 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */ 2380 PetscCall(ISRestoreIndices(r, &idx)); 2381 } 2382 } 2383 } 2384 break; 2385 case 3: /* distributed assembled matrix input (size>1) */ 2386 mumps->id.nnz_loc = mumps->nnz; 2387 mumps->id.irn_loc = mumps->irn; 2388 mumps->id.jcn_loc = mumps->jcn; 2389 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2390 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2391 PetscCall(MatCreateVecs(A, NULL, &b)); 2392 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2393 PetscCall(VecDestroy(&b)); 2394 } 2395 break; 2396 } 2397 PetscMUMPS_c(mumps); 2398 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2399 2400 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2401 F->ops->solve = MatSolve_MUMPS; 2402 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2403 F->ops->matsolve = MatMatSolve_MUMPS; 2404 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2405 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2406 2407 mumps->matstruc = SAME_NONZERO_PATTERN; 2408 PetscFunctionReturn(PETSC_SUCCESS); 2409 } 2410 2411 /* Note the PETSc r and c permutations are ignored */ 2412 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2413 { 2414 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2415 Vec b; 2416 const PetscInt M = A->rmap->N; 2417 2418 PetscFunctionBegin; 2419 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2420 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2421 PetscFunctionReturn(PETSC_SUCCESS); 2422 } 2423 2424 /* Set MUMPS options from the options database */ 2425 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2426 2427 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2428 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2429 2430 /* analysis phase */ 2431 mumps->id.job = JOB_FACTSYMBOLIC; 2432 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2433 switch (mumps->id.ICNTL(18)) { 2434 case 0: /* centralized assembled matrix input */ 2435 if (!mumps->myid) { 2436 mumps->id.nnz = mumps->nnz; 2437 mumps->id.irn = mumps->irn; 2438 mumps->id.jcn = mumps->jcn; 2439 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2440 } 2441 break; 2442 case 3: /* distributed assembled matrix input (size>1) */ 2443 mumps->id.nnz_loc = mumps->nnz; 2444 mumps->id.irn_loc = mumps->irn; 2445 mumps->id.jcn_loc = mumps->jcn; 2446 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2447 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2448 PetscCall(MatCreateVecs(A, NULL, &b)); 2449 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2450 PetscCall(VecDestroy(&b)); 2451 } 2452 break; 2453 } 2454 PetscMUMPS_c(mumps); 2455 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2456 2457 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2458 F->ops->solve = MatSolve_MUMPS; 2459 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2460 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2461 2462 mumps->matstruc = SAME_NONZERO_PATTERN; 2463 PetscFunctionReturn(PETSC_SUCCESS); 2464 } 2465 2466 /* Note the PETSc r permutation and factor info are ignored */ 2467 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info) 2468 { 2469 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2470 Vec b; 2471 const PetscInt M = A->rmap->N; 2472 2473 PetscFunctionBegin; 2474 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2475 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2476 PetscFunctionReturn(PETSC_SUCCESS); 2477 } 2478 2479 /* Set MUMPS options from the options database */ 2480 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2481 2482 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2483 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2484 2485 /* analysis phase */ 2486 mumps->id.job = JOB_FACTSYMBOLIC; 2487 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2488 switch (mumps->id.ICNTL(18)) { 2489 case 0: /* centralized assembled matrix input */ 2490 if (!mumps->myid) { 2491 mumps->id.nnz = mumps->nnz; 2492 mumps->id.irn = mumps->irn; 2493 mumps->id.jcn = mumps->jcn; 2494 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2495 } 2496 break; 2497 case 3: /* distributed assembled matrix input (size>1) */ 2498 mumps->id.nnz_loc = mumps->nnz; 2499 mumps->id.irn_loc = mumps->irn; 2500 mumps->id.jcn_loc = mumps->jcn; 2501 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2502 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2503 PetscCall(MatCreateVecs(A, NULL, &b)); 2504 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2505 PetscCall(VecDestroy(&b)); 2506 } 2507 break; 2508 } 2509 PetscMUMPS_c(mumps); 2510 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2511 2512 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2513 F->ops->solve = MatSolve_MUMPS; 2514 F->ops->solvetranspose = MatSolve_MUMPS; 2515 F->ops->matsolve = MatMatSolve_MUMPS; 2516 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2517 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2518 #if defined(PETSC_USE_COMPLEX) 2519 F->ops->getinertia = NULL; 2520 #else 2521 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2522 #endif 2523 2524 mumps->matstruc = SAME_NONZERO_PATTERN; 2525 PetscFunctionReturn(PETSC_SUCCESS); 2526 } 2527 2528 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2529 { 2530 PetscBool iascii; 2531 PetscViewerFormat format; 2532 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2533 2534 PetscFunctionBegin; 2535 /* check if matrix is mumps type */ 2536 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2537 2538 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2539 if (iascii) { 2540 PetscCall(PetscViewerGetFormat(viewer, &format)); 2541 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2542 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2543 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2544 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2545 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2546 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2547 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2548 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2549 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2550 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2551 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2552 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2553 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2554 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2555 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2556 if (mumps->id.ICNTL(11) > 0) { 2557 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)mumps->id.RINFOG(4))); 2558 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)mumps->id.RINFOG(5))); 2559 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)mumps->id.RINFOG(6))); 2560 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)mumps->id.RINFOG(7), (double)mumps->id.RINFOG(8))); 2561 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)mumps->id.RINFOG(9))); 2562 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)mumps->id.RINFOG(10), (double)mumps->id.RINFOG(11))); 2563 } 2564 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2565 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2566 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2567 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2568 /* ICNTL(15-17) not used */ 2569 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2570 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2571 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2572 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2573 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2574 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2575 2576 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2577 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2578 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2579 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2580 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2581 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2582 2583 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2584 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2585 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2586 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2587 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2588 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(37) (compression of the contribution blocks): %d\n", mumps->id.ICNTL(37))); 2589 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2590 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(48) (multithreading with tree parallelism): %d\n", mumps->id.ICNTL(48))); 2591 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56))); 2592 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2593 2594 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)mumps->id.CNTL(1))); 2595 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)mumps->id.CNTL(2))); 2596 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)mumps->id.CNTL(3))); 2597 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)mumps->id.CNTL(4))); 2598 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)mumps->id.CNTL(5))); 2599 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)mumps->id.CNTL(7))); 2600 2601 /* information local to each processor */ 2602 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2603 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2604 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(1))); 2605 PetscCall(PetscViewerFlush(viewer)); 2606 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2607 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(2))); 2608 PetscCall(PetscViewerFlush(viewer)); 2609 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2610 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(3))); 2611 PetscCall(PetscViewerFlush(viewer)); 2612 2613 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2614 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2615 PetscCall(PetscViewerFlush(viewer)); 2616 2617 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2618 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2619 PetscCall(PetscViewerFlush(viewer)); 2620 2621 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2622 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2623 PetscCall(PetscViewerFlush(viewer)); 2624 2625 if (mumps->ninfo && mumps->ninfo <= 80) { 2626 PetscInt i; 2627 for (i = 0; i < mumps->ninfo; i++) { 2628 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2629 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2630 PetscCall(PetscViewerFlush(viewer)); 2631 } 2632 } 2633 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2634 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2635 2636 if (mumps->myid == 0) { /* information from the host */ 2637 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)mumps->id.RINFOG(1))); 2638 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)mumps->id.RINFOG(2))); 2639 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)mumps->id.RINFOG(3))); 2640 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)mumps->id.RINFOG(12), (double)mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2641 2642 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2643 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2644 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2645 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2646 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2647 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2648 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2649 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2650 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2651 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2652 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2653 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2654 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2655 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2656 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2657 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2658 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2659 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2660 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2661 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2662 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2663 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2664 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2665 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2666 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2667 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2668 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2669 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2670 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2671 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2672 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2673 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2674 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2675 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2676 } 2677 } 2678 } 2679 PetscFunctionReturn(PETSC_SUCCESS); 2680 } 2681 2682 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info) 2683 { 2684 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2685 2686 PetscFunctionBegin; 2687 info->block_size = 1.0; 2688 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2689 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2690 info->nz_unneeded = 0.0; 2691 info->assemblies = 0.0; 2692 info->mallocs = 0.0; 2693 info->memory = 0.0; 2694 info->fill_ratio_given = 0; 2695 info->fill_ratio_needed = 0; 2696 info->factor_mallocs = 0; 2697 PetscFunctionReturn(PETSC_SUCCESS); 2698 } 2699 2700 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2701 { 2702 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2703 const PetscScalar *arr; 2704 const PetscInt *idxs; 2705 PetscInt size, i; 2706 2707 PetscFunctionBegin; 2708 PetscCall(ISGetLocalSize(is, &size)); 2709 /* Schur complement matrix */ 2710 PetscCall(MatDestroy(&F->schur)); 2711 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2712 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2713 mumps->id.schur = (MumpsScalar *)arr; 2714 PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur)); 2715 PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld)); 2716 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2717 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2718 2719 /* MUMPS expects Fortran style indices */ 2720 PetscCall(PetscFree(mumps->id.listvar_schur)); 2721 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2722 PetscCall(ISGetIndices(is, &idxs)); 2723 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i])); 2724 PetscCall(ISRestoreIndices(is, &idxs)); 2725 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2726 mumps->id.ICNTL(26) = -1; 2727 PetscFunctionReturn(PETSC_SUCCESS); 2728 } 2729 2730 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2731 { 2732 Mat St; 2733 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2734 PetscScalar *array; 2735 2736 PetscFunctionBegin; 2737 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 2738 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2739 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2740 PetscCall(MatSetType(St, MATDENSE)); 2741 PetscCall(MatSetUp(St)); 2742 PetscCall(MatDenseGetArray(St, &array)); 2743 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2744 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2745 PetscInt i, j, N = mumps->id.size_schur; 2746 for (i = 0; i < N; i++) { 2747 for (j = 0; j < N; j++) { 2748 #if !defined(PETSC_USE_COMPLEX) 2749 PetscScalar val = mumps->id.schur[i * N + j]; 2750 #else 2751 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2752 #endif 2753 array[j * N + i] = val; 2754 } 2755 } 2756 } else { /* stored by columns */ 2757 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2758 } 2759 } else { /* either full or lower-triangular (not packed) */ 2760 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2761 PetscInt i, j, N = mumps->id.size_schur; 2762 for (i = 0; i < N; i++) { 2763 for (j = i; j < N; j++) { 2764 #if !defined(PETSC_USE_COMPLEX) 2765 PetscScalar val = mumps->id.schur[i * N + j]; 2766 #else 2767 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2768 #endif 2769 array[i * N + j] = array[j * N + i] = val; 2770 } 2771 } 2772 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2773 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2774 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2775 PetscInt i, j, N = mumps->id.size_schur; 2776 for (i = 0; i < N; i++) { 2777 for (j = 0; j < i + 1; j++) { 2778 #if !defined(PETSC_USE_COMPLEX) 2779 PetscScalar val = mumps->id.schur[i * N + j]; 2780 #else 2781 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2782 #endif 2783 array[i * N + j] = array[j * N + i] = val; 2784 } 2785 } 2786 } 2787 } 2788 PetscCall(MatDenseRestoreArray(St, &array)); 2789 *S = St; 2790 PetscFunctionReturn(PETSC_SUCCESS); 2791 } 2792 2793 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2794 { 2795 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2796 2797 PetscFunctionBegin; 2798 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2799 PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2800 for (i = 0; i < nICNTL_pre; ++i) 2801 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2802 if (i == nICNTL_pre) { /* not already cached */ 2803 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2804 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2805 mumps->ICNTL_pre[0]++; 2806 } 2807 mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl; 2808 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2809 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2810 PetscFunctionReturn(PETSC_SUCCESS); 2811 } 2812 2813 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2814 { 2815 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2816 2817 PetscFunctionBegin; 2818 if (mumps->id.job == JOB_NULL) { 2819 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2820 *ival = 0; 2821 for (i = 0; i < nICNTL_pre; ++i) { 2822 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2823 } 2824 } else *ival = mumps->id.ICNTL(icntl); 2825 PetscFunctionReturn(PETSC_SUCCESS); 2826 } 2827 2828 /*@ 2829 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 2830 2831 Logically Collective 2832 2833 Input Parameters: 2834 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 2835 . icntl - index of MUMPS parameter array `ICNTL()` 2836 - ival - value of MUMPS `ICNTL(icntl)` 2837 2838 Options Database Key: 2839 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival` 2840 2841 Level: beginner 2842 2843 Note: 2844 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 2845 2846 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2847 @*/ 2848 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2849 { 2850 PetscFunctionBegin; 2851 PetscValidType(F, 1); 2852 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2853 PetscValidLogicalCollectiveInt(F, icntl, 2); 2854 PetscValidLogicalCollectiveInt(F, ival, 3); 2855 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2856 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2857 PetscFunctionReturn(PETSC_SUCCESS); 2858 } 2859 2860 /*@ 2861 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 2862 2863 Logically Collective 2864 2865 Input Parameters: 2866 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 2867 - icntl - index of MUMPS parameter array ICNTL() 2868 2869 Output Parameter: 2870 . ival - value of MUMPS ICNTL(icntl) 2871 2872 Level: beginner 2873 2874 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2875 @*/ 2876 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2877 { 2878 PetscFunctionBegin; 2879 PetscValidType(F, 1); 2880 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2881 PetscValidLogicalCollectiveInt(F, icntl, 2); 2882 PetscAssertPointer(ival, 3); 2883 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2884 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2885 PetscFunctionReturn(PETSC_SUCCESS); 2886 } 2887 2888 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2889 { 2890 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2891 2892 PetscFunctionBegin; 2893 if (mumps->id.job == JOB_NULL) { 2894 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2895 for (i = 0; i < nCNTL_pre; ++i) 2896 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2897 if (i == nCNTL_pre) { 2898 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2899 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2900 mumps->CNTL_pre[0]++; 2901 } 2902 mumps->CNTL_pre[1 + 2 * i] = icntl; 2903 mumps->CNTL_pre[2 + 2 * i] = val; 2904 } else mumps->id.CNTL(icntl) = val; 2905 PetscFunctionReturn(PETSC_SUCCESS); 2906 } 2907 2908 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2909 { 2910 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2911 2912 PetscFunctionBegin; 2913 if (mumps->id.job == JOB_NULL) { 2914 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2915 *val = 0.0; 2916 for (i = 0; i < nCNTL_pre; ++i) { 2917 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2918 } 2919 } else *val = mumps->id.CNTL(icntl); 2920 PetscFunctionReturn(PETSC_SUCCESS); 2921 } 2922 2923 /*@ 2924 MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 2925 2926 Logically Collective 2927 2928 Input Parameters: 2929 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 2930 . icntl - index of MUMPS parameter array `CNTL()` 2931 - val - value of MUMPS `CNTL(icntl)` 2932 2933 Options Database Key: 2934 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2935 2936 Level: beginner 2937 2938 Note: 2939 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 2940 2941 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2942 @*/ 2943 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2944 { 2945 PetscFunctionBegin; 2946 PetscValidType(F, 1); 2947 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2948 PetscValidLogicalCollectiveInt(F, icntl, 2); 2949 PetscValidLogicalCollectiveReal(F, val, 3); 2950 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2951 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2952 PetscFunctionReturn(PETSC_SUCCESS); 2953 } 2954 2955 /*@ 2956 MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 2957 2958 Logically Collective 2959 2960 Input Parameters: 2961 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 2962 - icntl - index of MUMPS parameter array CNTL() 2963 2964 Output Parameter: 2965 . val - value of MUMPS CNTL(icntl) 2966 2967 Level: beginner 2968 2969 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2970 @*/ 2971 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2972 { 2973 PetscFunctionBegin; 2974 PetscValidType(F, 1); 2975 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2976 PetscValidLogicalCollectiveInt(F, icntl, 2); 2977 PetscAssertPointer(val, 3); 2978 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2979 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2980 PetscFunctionReturn(PETSC_SUCCESS); 2981 } 2982 2983 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2984 { 2985 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2986 2987 PetscFunctionBegin; 2988 *info = mumps->id.INFO(icntl); 2989 PetscFunctionReturn(PETSC_SUCCESS); 2990 } 2991 2992 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2993 { 2994 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2995 2996 PetscFunctionBegin; 2997 *infog = mumps->id.INFOG(icntl); 2998 PetscFunctionReturn(PETSC_SUCCESS); 2999 } 3000 3001 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 3002 { 3003 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3004 3005 PetscFunctionBegin; 3006 *rinfo = mumps->id.RINFO(icntl); 3007 PetscFunctionReturn(PETSC_SUCCESS); 3008 } 3009 3010 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 3011 { 3012 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3013 3014 PetscFunctionBegin; 3015 *rinfog = mumps->id.RINFOG(icntl); 3016 PetscFunctionReturn(PETSC_SUCCESS); 3017 } 3018 3019 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 3020 { 3021 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3022 3023 PetscFunctionBegin; 3024 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 3025 *size = 0; 3026 *array = NULL; 3027 if (!mumps->myid) { 3028 *size = mumps->id.INFOG(28); 3029 PetscCall(PetscMalloc1(*size, array)); 3030 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 3031 } 3032 PetscFunctionReturn(PETSC_SUCCESS); 3033 } 3034 3035 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 3036 { 3037 Mat Bt = NULL, Btseq = NULL; 3038 PetscBool flg; 3039 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3040 PetscScalar *aa; 3041 PetscInt spnr, *ia, *ja, M, nrhs; 3042 3043 PetscFunctionBegin; 3044 PetscAssertPointer(spRHS, 2); 3045 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 3046 PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 3047 PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 3048 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 3049 3050 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3051 3052 if (mumps->petsc_size > 1) { 3053 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 3054 Btseq = b->A; 3055 } else { 3056 Btseq = Bt; 3057 } 3058 3059 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 3060 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 3061 PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs)); 3062 mumps->id.rhs = NULL; 3063 3064 if (!mumps->myid) { 3065 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 3066 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3067 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3068 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3069 mumps->id.rhs_sparse = (MumpsScalar *)aa; 3070 } else { 3071 mumps->id.irhs_ptr = NULL; 3072 mumps->id.irhs_sparse = NULL; 3073 mumps->id.nz_rhs = 0; 3074 mumps->id.rhs_sparse = NULL; 3075 } 3076 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3077 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3078 3079 /* solve phase */ 3080 mumps->id.job = JOB_SOLVE; 3081 PetscMUMPS_c(mumps); 3082 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 3083 3084 if (!mumps->myid) { 3085 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3086 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3087 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3088 } 3089 PetscFunctionReturn(PETSC_SUCCESS); 3090 } 3091 3092 /*@ 3093 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc> 3094 3095 Logically Collective 3096 3097 Input Parameter: 3098 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3099 3100 Output Parameter: 3101 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3102 3103 Level: beginner 3104 3105 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3106 @*/ 3107 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3108 { 3109 PetscFunctionBegin; 3110 PetscValidType(F, 1); 3111 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3112 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3113 PetscFunctionReturn(PETSC_SUCCESS); 3114 } 3115 3116 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3117 { 3118 Mat spRHS; 3119 3120 PetscFunctionBegin; 3121 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3122 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3123 PetscCall(MatDestroy(&spRHS)); 3124 PetscFunctionReturn(PETSC_SUCCESS); 3125 } 3126 3127 /*@ 3128 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc> 3129 3130 Logically Collective 3131 3132 Input Parameter: 3133 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3134 3135 Output Parameter: 3136 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3137 3138 Level: beginner 3139 3140 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3141 @*/ 3142 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3143 { 3144 PetscBool flg; 3145 3146 PetscFunctionBegin; 3147 PetscValidType(F, 1); 3148 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3149 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3150 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3151 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3152 PetscFunctionReturn(PETSC_SUCCESS); 3153 } 3154 3155 /*@ 3156 MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc> 3157 3158 Logically Collective 3159 3160 Input Parameters: 3161 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3162 - icntl - index of MUMPS parameter array INFO() 3163 3164 Output Parameter: 3165 . ival - value of MUMPS INFO(icntl) 3166 3167 Level: beginner 3168 3169 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3170 @*/ 3171 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3172 { 3173 PetscFunctionBegin; 3174 PetscValidType(F, 1); 3175 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3176 PetscAssertPointer(ival, 3); 3177 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3178 PetscFunctionReturn(PETSC_SUCCESS); 3179 } 3180 3181 /*@ 3182 MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc> 3183 3184 Logically Collective 3185 3186 Input Parameters: 3187 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3188 - icntl - index of MUMPS parameter array INFOG() 3189 3190 Output Parameter: 3191 . ival - value of MUMPS INFOG(icntl) 3192 3193 Level: beginner 3194 3195 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3196 @*/ 3197 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3198 { 3199 PetscFunctionBegin; 3200 PetscValidType(F, 1); 3201 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3202 PetscAssertPointer(ival, 3); 3203 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3204 PetscFunctionReturn(PETSC_SUCCESS); 3205 } 3206 3207 /*@ 3208 MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc> 3209 3210 Logically Collective 3211 3212 Input Parameters: 3213 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3214 - icntl - index of MUMPS parameter array RINFO() 3215 3216 Output Parameter: 3217 . val - value of MUMPS RINFO(icntl) 3218 3219 Level: beginner 3220 3221 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3222 @*/ 3223 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3224 { 3225 PetscFunctionBegin; 3226 PetscValidType(F, 1); 3227 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3228 PetscAssertPointer(val, 3); 3229 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3230 PetscFunctionReturn(PETSC_SUCCESS); 3231 } 3232 3233 /*@ 3234 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc> 3235 3236 Logically Collective 3237 3238 Input Parameters: 3239 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3240 - icntl - index of MUMPS parameter array RINFOG() 3241 3242 Output Parameter: 3243 . val - value of MUMPS RINFOG(icntl) 3244 3245 Level: beginner 3246 3247 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3248 @*/ 3249 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3250 { 3251 PetscFunctionBegin; 3252 PetscValidType(F, 1); 3253 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3254 PetscAssertPointer(val, 3); 3255 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3256 PetscFunctionReturn(PETSC_SUCCESS); 3257 } 3258 3259 /*@ 3260 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc> 3261 3262 Logically Collective 3263 3264 Input Parameter: 3265 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3266 3267 Output Parameters: 3268 + size - local size of the array. The size of the array is non-zero only on MPI rank 0 3269 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3270 for freeing this array. 3271 3272 Level: beginner 3273 3274 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3275 @*/ 3276 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3277 { 3278 PetscFunctionBegin; 3279 PetscValidType(F, 1); 3280 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3281 PetscAssertPointer(size, 2); 3282 PetscAssertPointer(array, 3); 3283 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3284 PetscFunctionReturn(PETSC_SUCCESS); 3285 } 3286 3287 /*MC 3288 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3289 MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc> 3290 3291 Works with `MATAIJ` and `MATSBAIJ` matrices 3292 3293 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3294 3295 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3296 See details below. 3297 3298 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3299 3300 Options Database Keys: 3301 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3302 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3303 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3304 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3305 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3306 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3307 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3308 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3309 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3310 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3311 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3312 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3313 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3314 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3315 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3316 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3317 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3318 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3319 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3320 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3321 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3322 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering 3323 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3324 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3325 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3326 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3327 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3328 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3329 . -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB) 3330 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3331 . -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism 3332 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3333 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3334 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3335 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3336 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3337 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3338 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3339 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3340 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3341 3342 Level: beginner 3343 3344 Notes: 3345 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will 3346 error if the matrix is Hermitian. 3347 3348 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3349 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3350 3351 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3352 the failure with 3353 .vb 3354 KSPGetPC(ksp,&pc); 3355 PCFactorGetMatrix(pc,&mat); 3356 MatMumpsGetInfo(mat,....); 3357 MatMumpsGetInfog(mat,....); etc. 3358 .ve 3359 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3360 3361 MUMPS provides 64-bit integer support in two build modes: 3362 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3363 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3364 3365 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3366 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3367 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3368 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3369 3370 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3371 3372 Two modes to run MUMPS/PETSc with OpenMP 3373 .vb 3374 Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3375 threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test". 3376 .ve 3377 3378 .vb 3379 `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example, 3380 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3381 .ve 3382 3383 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3384 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3385 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3386 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3387 (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided). 3388 3389 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3390 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3391 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3392 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3393 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3394 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3395 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3396 MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3397 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3398 problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding. 3399 For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and 3400 examine the mapping result. 3401 3402 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3403 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3404 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3405 3406 See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating` 3407 3408 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3409 M*/ 3410 3411 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type) 3412 { 3413 PetscFunctionBegin; 3414 *type = MATSOLVERMUMPS; 3415 PetscFunctionReturn(PETSC_SUCCESS); 3416 } 3417 3418 /* MatGetFactor for Seq and MPI AIJ matrices */ 3419 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3420 { 3421 Mat B; 3422 Mat_MUMPS *mumps; 3423 PetscBool isSeqAIJ, isDiag, isDense; 3424 PetscMPIInt size; 3425 3426 PetscFunctionBegin; 3427 #if defined(PETSC_USE_COMPLEX) 3428 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3429 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3430 *F = NULL; 3431 PetscFunctionReturn(PETSC_SUCCESS); 3432 } 3433 #endif 3434 /* Create the factorization matrix */ 3435 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3436 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 3437 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3438 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3439 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3440 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3441 PetscCall(MatSetUp(B)); 3442 3443 PetscCall(PetscNew(&mumps)); 3444 3445 B->ops->view = MatView_MUMPS; 3446 B->ops->getinfo = MatGetInfo_MUMPS; 3447 3448 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3449 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3450 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3451 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3452 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3453 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3454 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3455 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3456 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3457 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3458 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3459 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3460 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3461 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3462 3463 if (ftype == MAT_FACTOR_LU) { 3464 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3465 B->factortype = MAT_FACTOR_LU; 3466 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3467 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3468 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3469 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3470 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3471 mumps->sym = 0; 3472 } else { 3473 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3474 B->factortype = MAT_FACTOR_CHOLESKY; 3475 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3476 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3477 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3478 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3479 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3480 #if defined(PETSC_USE_COMPLEX) 3481 mumps->sym = 2; 3482 #else 3483 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3484 else mumps->sym = 2; 3485 #endif 3486 } 3487 3488 /* set solvertype */ 3489 PetscCall(PetscFree(B->solvertype)); 3490 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3491 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3492 if (size == 1) { 3493 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3494 B->canuseordering = PETSC_TRUE; 3495 } 3496 B->ops->destroy = MatDestroy_MUMPS; 3497 B->data = (void *)mumps; 3498 3499 *F = B; 3500 mumps->id.job = JOB_NULL; 3501 mumps->ICNTL_pre = NULL; 3502 mumps->CNTL_pre = NULL; 3503 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3504 PetscFunctionReturn(PETSC_SUCCESS); 3505 } 3506 3507 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3508 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F) 3509 { 3510 Mat B; 3511 Mat_MUMPS *mumps; 3512 PetscBool isSeqSBAIJ; 3513 PetscMPIInt size; 3514 3515 PetscFunctionBegin; 3516 #if defined(PETSC_USE_COMPLEX) 3517 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3518 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3519 *F = NULL; 3520 PetscFunctionReturn(PETSC_SUCCESS); 3521 } 3522 #endif 3523 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3524 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3525 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3526 PetscCall(MatSetUp(B)); 3527 3528 PetscCall(PetscNew(&mumps)); 3529 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3530 if (isSeqSBAIJ) { 3531 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3532 } else { 3533 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3534 } 3535 3536 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3537 B->ops->view = MatView_MUMPS; 3538 B->ops->getinfo = MatGetInfo_MUMPS; 3539 3540 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3541 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3542 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3543 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3544 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3545 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3546 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3547 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3548 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3549 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3550 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3551 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3552 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3553 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3554 3555 B->factortype = MAT_FACTOR_CHOLESKY; 3556 #if defined(PETSC_USE_COMPLEX) 3557 mumps->sym = 2; 3558 #else 3559 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3560 else mumps->sym = 2; 3561 #endif 3562 3563 /* set solvertype */ 3564 PetscCall(PetscFree(B->solvertype)); 3565 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3566 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3567 if (size == 1) { 3568 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3569 B->canuseordering = PETSC_TRUE; 3570 } 3571 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3572 B->ops->destroy = MatDestroy_MUMPS; 3573 B->data = (void *)mumps; 3574 3575 *F = B; 3576 mumps->id.job = JOB_NULL; 3577 mumps->ICNTL_pre = NULL; 3578 mumps->CNTL_pre = NULL; 3579 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3580 PetscFunctionReturn(PETSC_SUCCESS); 3581 } 3582 3583 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3584 { 3585 Mat B; 3586 Mat_MUMPS *mumps; 3587 PetscBool isSeqBAIJ; 3588 PetscMPIInt size; 3589 3590 PetscFunctionBegin; 3591 /* Create the factorization matrix */ 3592 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3593 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3594 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3595 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3596 PetscCall(MatSetUp(B)); 3597 3598 PetscCall(PetscNew(&mumps)); 3599 if (ftype == MAT_FACTOR_LU) { 3600 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3601 B->factortype = MAT_FACTOR_LU; 3602 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3603 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3604 mumps->sym = 0; 3605 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3606 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3607 3608 B->ops->view = MatView_MUMPS; 3609 B->ops->getinfo = MatGetInfo_MUMPS; 3610 3611 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3612 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3613 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3614 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3615 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3616 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3617 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3618 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3619 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3620 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3621 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3622 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3623 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3624 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3625 3626 /* set solvertype */ 3627 PetscCall(PetscFree(B->solvertype)); 3628 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3629 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3630 if (size == 1) { 3631 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3632 B->canuseordering = PETSC_TRUE; 3633 } 3634 B->ops->destroy = MatDestroy_MUMPS; 3635 B->data = (void *)mumps; 3636 3637 *F = B; 3638 mumps->id.job = JOB_NULL; 3639 mumps->ICNTL_pre = NULL; 3640 mumps->CNTL_pre = NULL; 3641 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3642 PetscFunctionReturn(PETSC_SUCCESS); 3643 } 3644 3645 /* MatGetFactor for Seq and MPI SELL matrices */ 3646 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3647 { 3648 Mat B; 3649 Mat_MUMPS *mumps; 3650 PetscBool isSeqSELL; 3651 PetscMPIInt size; 3652 3653 PetscFunctionBegin; 3654 /* Create the factorization matrix */ 3655 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3656 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3657 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3658 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3659 PetscCall(MatSetUp(B)); 3660 3661 PetscCall(PetscNew(&mumps)); 3662 3663 B->ops->view = MatView_MUMPS; 3664 B->ops->getinfo = MatGetInfo_MUMPS; 3665 3666 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3667 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3668 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3669 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3670 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3671 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3672 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3673 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3674 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3675 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3676 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3677 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3678 3679 if (ftype == MAT_FACTOR_LU) { 3680 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3681 B->factortype = MAT_FACTOR_LU; 3682 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3683 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3684 mumps->sym = 0; 3685 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3686 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3687 3688 /* set solvertype */ 3689 PetscCall(PetscFree(B->solvertype)); 3690 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3691 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3692 if (size == 1) { 3693 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3694 B->canuseordering = PETSC_TRUE; 3695 } 3696 B->ops->destroy = MatDestroy_MUMPS; 3697 B->data = (void *)mumps; 3698 3699 *F = B; 3700 mumps->id.job = JOB_NULL; 3701 mumps->ICNTL_pre = NULL; 3702 mumps->CNTL_pre = NULL; 3703 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3704 PetscFunctionReturn(PETSC_SUCCESS); 3705 } 3706 3707 /* MatGetFactor for MATNEST matrices */ 3708 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3709 { 3710 Mat B, **mats; 3711 Mat_MUMPS *mumps; 3712 PetscInt nr, nc; 3713 PetscMPIInt size; 3714 PetscBool flg = PETSC_TRUE; 3715 3716 PetscFunctionBegin; 3717 #if defined(PETSC_USE_COMPLEX) 3718 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3719 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3720 *F = NULL; 3721 PetscFunctionReturn(PETSC_SUCCESS); 3722 } 3723 #endif 3724 3725 /* Return if some condition is not satisfied */ 3726 *F = NULL; 3727 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3728 if (ftype == MAT_FACTOR_CHOLESKY) { 3729 IS *rows, *cols; 3730 PetscInt *m, *M; 3731 3732 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3733 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3734 PetscCall(MatNestGetISs(A, rows, cols)); 3735 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3736 if (!flg) { 3737 PetscCall(PetscFree2(rows, cols)); 3738 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3739 PetscFunctionReturn(PETSC_SUCCESS); 3740 } 3741 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3742 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3743 for (PetscInt r = 0; flg && r < nr; r++) 3744 for (PetscInt k = r + 1; flg && k < nr; k++) 3745 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3746 PetscCall(PetscFree2(m, M)); 3747 PetscCall(PetscFree2(rows, cols)); 3748 if (!flg) { 3749 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3750 PetscFunctionReturn(PETSC_SUCCESS); 3751 } 3752 } 3753 3754 for (PetscInt r = 0; r < nr; r++) { 3755 for (PetscInt c = 0; c < nc; c++) { 3756 Mat sub = mats[r][c]; 3757 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 3758 3759 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3760 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 3761 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3762 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3763 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3764 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3765 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3766 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3767 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 3768 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3769 if (ftype == MAT_FACTOR_CHOLESKY) { 3770 if (r == c) { 3771 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 3772 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3773 flg = PETSC_FALSE; 3774 } 3775 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3776 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3777 flg = PETSC_FALSE; 3778 } 3779 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3780 PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3781 flg = PETSC_FALSE; 3782 } 3783 } 3784 } 3785 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3786 3787 /* Create the factorization matrix */ 3788 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3789 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3790 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3791 PetscCall(MatSetUp(B)); 3792 3793 PetscCall(PetscNew(&mumps)); 3794 3795 B->ops->view = MatView_MUMPS; 3796 B->ops->getinfo = MatGetInfo_MUMPS; 3797 3798 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3799 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3800 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3801 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3802 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3803 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3804 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3805 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3806 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3807 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3808 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3809 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3810 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3811 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3812 3813 if (ftype == MAT_FACTOR_LU) { 3814 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3815 B->factortype = MAT_FACTOR_LU; 3816 mumps->sym = 0; 3817 } else { 3818 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3819 B->factortype = MAT_FACTOR_CHOLESKY; 3820 #if defined(PETSC_USE_COMPLEX) 3821 mumps->sym = 2; 3822 #else 3823 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3824 else mumps->sym = 2; 3825 #endif 3826 } 3827 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3828 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3829 3830 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3831 if (size == 1) { 3832 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3833 B->canuseordering = PETSC_TRUE; 3834 } 3835 3836 /* set solvertype */ 3837 PetscCall(PetscFree(B->solvertype)); 3838 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3839 B->ops->destroy = MatDestroy_MUMPS; 3840 B->data = (void *)mumps; 3841 3842 *F = B; 3843 mumps->id.job = JOB_NULL; 3844 mumps->ICNTL_pre = NULL; 3845 mumps->CNTL_pre = NULL; 3846 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3847 PetscFunctionReturn(PETSC_SUCCESS); 3848 } 3849 3850 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3851 { 3852 PetscFunctionBegin; 3853 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3854 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3855 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3856 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3857 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3858 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3859 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3860 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3861 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3862 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3863 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3864 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3865 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3866 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3867 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3868 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3869 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3870 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3871 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3872 PetscFunctionReturn(PETSC_SUCCESS); 3873 } 3874