1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <petscsf.h> 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 9 #include <../src/mat/impls/sell/mpi/mpisell.h> 10 11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #if defined(PETSC_USE_REAL_SINGLE) 16 #include <cmumps_c.h> 17 #else 18 #include <zmumps_c.h> 19 #endif 20 #else 21 #if defined(PETSC_USE_REAL_SINGLE) 22 #include <smumps_c.h> 23 #else 24 #include <dmumps_c.h> 25 #endif 26 #endif 27 EXTERN_C_END 28 #define JOB_INIT -1 29 #define JOB_NULL 0 30 #define JOB_FACTSYMBOLIC 1 31 #define JOB_FACTNUMERIC 2 32 #define JOB_SOLVE 3 33 #define JOB_END -2 34 35 /* calls to MUMPS */ 36 #if defined(PETSC_USE_COMPLEX) 37 #if defined(PETSC_USE_REAL_SINGLE) 38 #define MUMPS_c cmumps_c 39 #else 40 #define MUMPS_c zmumps_c 41 #endif 42 #else 43 #if defined(PETSC_USE_REAL_SINGLE) 44 #define MUMPS_c smumps_c 45 #else 46 #define MUMPS_c dmumps_c 47 #endif 48 #endif 49 50 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 51 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 52 naming convention in PetscMPIInt, PetscBLASInt etc. 53 */ 54 typedef MUMPS_INT PetscMUMPSInt; 55 56 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 57 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 58 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 59 #endif 60 #else 61 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 62 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 63 #endif 64 #endif 65 66 #define MPIU_MUMPSINT MPI_INT 67 #define PETSC_MUMPS_INT_MAX 2147483647 68 #define PETSC_MUMPS_INT_MIN -2147483648 69 70 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 71 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 72 { 73 PetscFunctionBegin; 74 #if PetscDefined(USE_64BIT_INDICES) 75 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 76 #endif 77 *b = (PetscMUMPSInt)(a); 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 /* Put these utility routines here since they are only used in this file */ 82 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 83 { 84 PetscInt myval; 85 PetscBool myset; 86 PetscFunctionBegin; 87 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 88 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 89 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 90 if (set) *set = myset; 91 PetscFunctionReturn(PETSC_SUCCESS); 92 } 93 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 94 95 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 96 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 97 #define PetscMUMPS_c(mumps) \ 98 do { \ 99 if (mumps->use_petsc_omp_support) { \ 100 if (mumps->is_omp_master) { \ 101 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 102 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 103 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 104 PetscCall(PetscFPTrapPop()); \ 105 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 106 } \ 107 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 108 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 109 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 110 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 111 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 112 */ \ 113 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 115 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 116 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 117 } else { \ 118 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 119 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 120 PetscCall(PetscFPTrapPop()); \ 121 } \ 122 } while (0) 123 #else 124 #define PetscMUMPS_c(mumps) \ 125 do { \ 126 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 127 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 128 PetscCall(PetscFPTrapPop()); \ 129 } while (0) 130 #endif 131 132 /* declare MumpsScalar */ 133 #if defined(PETSC_USE_COMPLEX) 134 #if defined(PETSC_USE_REAL_SINGLE) 135 #define MumpsScalar mumps_complex 136 #else 137 #define MumpsScalar mumps_double_complex 138 #endif 139 #else 140 #define MumpsScalar PetscScalar 141 #endif 142 143 /* macros s.t. indices match MUMPS documentation */ 144 #define ICNTL(I) icntl[(I)-1] 145 #define CNTL(I) cntl[(I)-1] 146 #define INFOG(I) infog[(I)-1] 147 #define INFO(I) info[(I)-1] 148 #define RINFOG(I) rinfog[(I)-1] 149 #define RINFO(I) rinfo[(I)-1] 150 151 typedef struct Mat_MUMPS Mat_MUMPS; 152 struct Mat_MUMPS { 153 #if defined(PETSC_USE_COMPLEX) 154 #if defined(PETSC_USE_REAL_SINGLE) 155 CMUMPS_STRUC_C id; 156 #else 157 ZMUMPS_STRUC_C id; 158 #endif 159 #else 160 #if defined(PETSC_USE_REAL_SINGLE) 161 SMUMPS_STRUC_C id; 162 #else 163 DMUMPS_STRUC_C id; 164 #endif 165 #endif 166 167 MatStructure matstruc; 168 PetscMPIInt myid, petsc_size; 169 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 170 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 171 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 172 PetscMUMPSInt sym; 173 MPI_Comm mumps_comm; 174 PetscMUMPSInt *ICNTL_pre; 175 PetscReal *CNTL_pre; 176 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 177 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 178 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 179 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 180 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 181 PetscInt *rhs_nrow, max_nrhs; 182 PetscMPIInt *rhs_recvcounts, *rhs_disps; 183 PetscScalar *rhs_loc, *rhs_recvbuf; 184 #endif 185 Vec b_seq, x_seq; 186 PetscInt ninfo, *info; /* which INFO to display */ 187 PetscInt sizeredrhs; 188 PetscScalar *schur_sol; 189 PetscInt schur_sizesol; 190 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 191 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 192 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 193 194 /* Support for MATNEST */ 195 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 196 PetscInt64 *nest_vals_start; 197 PetscScalar *nest_vals; 198 199 /* stuff used by petsc/mumps OpenMP support*/ 200 PetscBool use_petsc_omp_support; 201 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 202 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 203 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 204 PetscMPIInt tag, omp_comm_size; 205 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 206 MPI_Request *reqs; 207 }; 208 209 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 210 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 211 */ 212 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 213 { 214 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 215 216 PetscFunctionBegin; 217 #if defined(PETSC_USE_64BIT_INDICES) 218 { 219 PetscInt i; 220 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 221 PetscCall(PetscFree(mumps->ia_alloc)); 222 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 223 mumps->cur_ilen = nrow + 1; 224 } 225 if (nnz > mumps->cur_jlen) { 226 PetscCall(PetscFree(mumps->ja_alloc)); 227 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 228 mumps->cur_jlen = nnz; 229 } 230 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 231 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 232 *ia_mumps = mumps->ia_alloc; 233 *ja_mumps = mumps->ja_alloc; 234 } 235 #else 236 *ia_mumps = ia; 237 *ja_mumps = ja; 238 #endif 239 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 240 PetscFunctionReturn(PETSC_SUCCESS); 241 } 242 243 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 244 { 245 PetscFunctionBegin; 246 PetscCall(PetscFree(mumps->id.listvar_schur)); 247 PetscCall(PetscFree(mumps->id.redrhs)); 248 PetscCall(PetscFree(mumps->schur_sol)); 249 mumps->id.size_schur = 0; 250 mumps->id.schur_lld = 0; 251 mumps->id.ICNTL(19) = 0; 252 PetscFunctionReturn(PETSC_SUCCESS); 253 } 254 255 /* solve with rhs in mumps->id.redrhs and return in the same location */ 256 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 257 { 258 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 259 Mat S, B, X; 260 MatFactorSchurStatus schurstatus; 261 PetscInt sizesol; 262 263 PetscFunctionBegin; 264 PetscCall(MatFactorFactorizeSchurComplement(F)); 265 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 266 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 267 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 268 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 269 PetscCall(MatBindToCPU(B, S->boundtocpu)); 270 #endif 271 switch (schurstatus) { 272 case MAT_FACTOR_SCHUR_FACTORED: 273 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 274 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 275 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 276 PetscCall(MatBindToCPU(X, S->boundtocpu)); 277 #endif 278 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 279 PetscCall(MatMatSolveTranspose(S, B, X)); 280 } else { 281 PetscCall(MatMatSolve(S, B, X)); 282 } 283 break; 284 case MAT_FACTOR_SCHUR_INVERTED: 285 sizesol = mumps->id.nrhs * mumps->id.size_schur; 286 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 287 PetscCall(PetscFree(mumps->schur_sol)); 288 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 289 mumps->schur_sizesol = sizesol; 290 } 291 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 292 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 294 PetscCall(MatBindToCPU(X, S->boundtocpu)); 295 #endif 296 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 297 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 298 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 299 } else { 300 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 301 } 302 PetscCall(MatProductSetFromOptions(X)); 303 PetscCall(MatProductSymbolic(X)); 304 PetscCall(MatProductNumeric(X)); 305 306 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 307 break; 308 default: 309 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 310 } 311 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 312 PetscCall(MatDestroy(&B)); 313 PetscCall(MatDestroy(&X)); 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 318 { 319 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 320 321 PetscFunctionBegin; 322 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 if (!expansion) { /* prepare for the condensation step */ 326 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 327 /* allocate MUMPS internal array to store reduced right-hand sides */ 328 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 329 PetscCall(PetscFree(mumps->id.redrhs)); 330 mumps->id.lredrhs = mumps->id.size_schur; 331 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 332 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 333 } 334 } else { /* prepare for the expansion step */ 335 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 336 PetscCall(MatMumpsSolveSchur_Private(F)); 337 mumps->id.ICNTL(26) = 2; /* expansion phase */ 338 PetscMUMPS_c(mumps); 339 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 340 /* restore defaults */ 341 mumps->id.ICNTL(26) = -1; 342 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 343 if (mumps->id.nrhs > 1) { 344 PetscCall(PetscFree(mumps->id.redrhs)); 345 mumps->id.lredrhs = 0; 346 mumps->sizeredrhs = 0; 347 } 348 } 349 PetscFunctionReturn(PETSC_SUCCESS); 350 } 351 352 /* 353 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 354 355 input: 356 A - matrix in aij,baij or sbaij format 357 shift - 0: C style output triple; 1: Fortran style output triple. 358 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 359 MAT_REUSE_MATRIX: only the values in v array are updated 360 output: 361 nnz - dim of r, c, and v (number of local nonzero entries of A) 362 r, c, v - row and col index, matrix values (matrix triples) 363 364 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 365 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 366 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 367 368 */ 369 370 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 371 { 372 const PetscScalar *av; 373 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 374 PetscInt64 nz, rnz, i, j, k; 375 PetscMUMPSInt *row, *col; 376 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 377 378 PetscFunctionBegin; 379 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 380 mumps->val = (PetscScalar *)av; 381 if (reuse == MAT_INITIAL_MATRIX) { 382 nz = aa->nz; 383 ai = aa->i; 384 aj = aa->j; 385 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 386 for (i = k = 0; i < M; i++) { 387 rnz = ai[i + 1] - ai[i]; 388 ajj = aj + ai[i]; 389 for (j = 0; j < rnz; j++) { 390 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 391 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 392 k++; 393 } 394 } 395 mumps->irn = row; 396 mumps->jcn = col; 397 mumps->nnz = nz; 398 } 399 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 404 { 405 PetscInt64 nz, i, j, k, r; 406 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 407 PetscMUMPSInt *row, *col; 408 409 PetscFunctionBegin; 410 mumps->val = a->val; 411 if (reuse == MAT_INITIAL_MATRIX) { 412 nz = a->sliidx[a->totalslices]; 413 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 414 for (i = k = 0; i < a->totalslices; i++) { 415 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 416 } 417 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 418 mumps->irn = row; 419 mumps->jcn = col; 420 mumps->nnz = nz; 421 } 422 PetscFunctionReturn(PETSC_SUCCESS); 423 } 424 425 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 426 { 427 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 428 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 429 PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 430 PetscInt bs; 431 PetscMUMPSInt *row, *col; 432 433 PetscFunctionBegin; 434 PetscCall(MatGetBlockSize(A, &bs)); 435 M = A->rmap->N / bs; 436 mumps->val = aa->a; 437 if (reuse == MAT_INITIAL_MATRIX) { 438 ai = aa->i; 439 aj = aa->j; 440 nz = bs2 * aa->nz; 441 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 442 for (i = 0; i < M; i++) { 443 ajj = aj + ai[i]; 444 rnz = ai[i + 1] - ai[i]; 445 for (k = 0; k < rnz; k++) { 446 for (j = 0; j < bs; j++) { 447 for (m = 0; m < bs; m++) { 448 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 449 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 450 idx++; 451 } 452 } 453 } 454 } 455 mumps->irn = row; 456 mumps->jcn = col; 457 mumps->nnz = nz; 458 } 459 PetscFunctionReturn(PETSC_SUCCESS); 460 } 461 462 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 463 { 464 const PetscInt *ai, *aj, *ajj; 465 PetscInt bs; 466 PetscInt64 nz, rnz, i, j, k, m; 467 PetscMUMPSInt *row, *col; 468 PetscScalar *val; 469 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 470 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 471 #if defined(PETSC_USE_COMPLEX) 472 PetscBool isset, hermitian; 473 #endif 474 475 PetscFunctionBegin; 476 #if defined(PETSC_USE_COMPLEX) 477 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 478 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 479 #endif 480 ai = aa->i; 481 aj = aa->j; 482 PetscCall(MatGetBlockSize(A, &bs)); 483 if (reuse == MAT_INITIAL_MATRIX) { 484 const PetscInt64 alloc_size = aa->nz * bs2; 485 486 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 487 if (bs > 1) { 488 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 489 mumps->val = mumps->val_alloc; 490 } else { 491 mumps->val = aa->a; 492 } 493 mumps->irn = row; 494 mumps->jcn = col; 495 } else { 496 if (bs == 1) mumps->val = aa->a; 497 row = mumps->irn; 498 col = mumps->jcn; 499 } 500 val = mumps->val; 501 502 nz = 0; 503 if (bs > 1) { 504 for (i = 0; i < mbs; i++) { 505 rnz = ai[i + 1] - ai[i]; 506 ajj = aj + ai[i]; 507 for (j = 0; j < rnz; j++) { 508 for (k = 0; k < bs; k++) { 509 for (m = 0; m < bs; m++) { 510 if (ajj[j] > i || k >= m) { 511 if (reuse == MAT_INITIAL_MATRIX) { 512 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 513 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 514 } 515 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 516 } 517 } 518 } 519 } 520 } 521 } else if (reuse == MAT_INITIAL_MATRIX) { 522 for (i = 0; i < mbs; i++) { 523 rnz = ai[i + 1] - ai[i]; 524 ajj = aj + ai[i]; 525 for (j = 0; j < rnz; j++) { 526 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 527 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 528 nz++; 529 } 530 } 531 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 532 } 533 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 534 PetscFunctionReturn(PETSC_SUCCESS); 535 } 536 537 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 538 { 539 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 540 PetscInt64 nz, rnz, i, j; 541 const PetscScalar *av, *v1; 542 PetscScalar *val; 543 PetscMUMPSInt *row, *col; 544 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 545 PetscBool missing; 546 #if defined(PETSC_USE_COMPLEX) 547 PetscBool hermitian, isset; 548 #endif 549 550 PetscFunctionBegin; 551 #if defined(PETSC_USE_COMPLEX) 552 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 553 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 554 #endif 555 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 556 ai = aa->i; 557 aj = aa->j; 558 adiag = aa->diag; 559 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 560 if (reuse == MAT_INITIAL_MATRIX) { 561 /* count nz in the upper triangular part of A */ 562 nz = 0; 563 if (missing) { 564 for (i = 0; i < M; i++) { 565 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 566 for (j = ai[i]; j < ai[i + 1]; j++) { 567 if (aj[j] < i) continue; 568 nz++; 569 } 570 } else { 571 nz += ai[i + 1] - adiag[i]; 572 } 573 } 574 } else { 575 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 576 } 577 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 578 PetscCall(PetscMalloc1(nz, &val)); 579 mumps->nnz = nz; 580 mumps->irn = row; 581 mumps->jcn = col; 582 mumps->val = mumps->val_alloc = val; 583 584 nz = 0; 585 if (missing) { 586 for (i = 0; i < M; i++) { 587 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 588 for (j = ai[i]; j < ai[i + 1]; j++) { 589 if (aj[j] < i) continue; 590 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 591 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 592 val[nz] = av[j]; 593 nz++; 594 } 595 } else { 596 rnz = ai[i + 1] - adiag[i]; 597 ajj = aj + adiag[i]; 598 v1 = av + adiag[i]; 599 for (j = 0; j < rnz; j++) { 600 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 601 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 602 val[nz++] = v1[j]; 603 } 604 } 605 } 606 } else { 607 for (i = 0; i < M; i++) { 608 rnz = ai[i + 1] - adiag[i]; 609 ajj = aj + adiag[i]; 610 v1 = av + adiag[i]; 611 for (j = 0; j < rnz; j++) { 612 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 613 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 614 val[nz++] = v1[j]; 615 } 616 } 617 } 618 } else { 619 nz = 0; 620 val = mumps->val; 621 if (missing) { 622 for (i = 0; i < M; i++) { 623 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 624 for (j = ai[i]; j < ai[i + 1]; j++) { 625 if (aj[j] < i) continue; 626 val[nz++] = av[j]; 627 } 628 } else { 629 rnz = ai[i + 1] - adiag[i]; 630 v1 = av + adiag[i]; 631 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 632 } 633 } 634 } else { 635 for (i = 0; i < M; i++) { 636 rnz = ai[i + 1] - adiag[i]; 637 v1 = av + adiag[i]; 638 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 639 } 640 } 641 } 642 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 643 PetscFunctionReturn(PETSC_SUCCESS); 644 } 645 646 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 647 { 648 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 649 PetscInt bs; 650 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 651 PetscMUMPSInt *row, *col; 652 const PetscScalar *av, *bv, *v1, *v2; 653 PetscScalar *val; 654 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 655 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 656 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 657 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 658 #if defined(PETSC_USE_COMPLEX) 659 PetscBool hermitian, isset; 660 #endif 661 662 PetscFunctionBegin; 663 #if defined(PETSC_USE_COMPLEX) 664 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 665 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 666 #endif 667 PetscCall(MatGetBlockSize(A, &bs)); 668 rstart = A->rmap->rstart; 669 ai = aa->i; 670 aj = aa->j; 671 bi = bb->i; 672 bj = bb->j; 673 av = aa->a; 674 bv = bb->a; 675 676 garray = mat->garray; 677 678 if (reuse == MAT_INITIAL_MATRIX) { 679 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 680 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 681 PetscCall(PetscMalloc1(nz, &val)); 682 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 683 mumps->irn = row; 684 mumps->jcn = col; 685 mumps->val = mumps->val_alloc = val; 686 } else { 687 val = mumps->val; 688 } 689 690 jj = 0; 691 irow = rstart; 692 for (i = 0; i < mbs; i++) { 693 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 694 countA = ai[i + 1] - ai[i]; 695 countB = bi[i + 1] - bi[i]; 696 bjj = bj + bi[i]; 697 v1 = av + ai[i] * bs2; 698 v2 = bv + bi[i] * bs2; 699 700 if (bs > 1) { 701 /* A-part */ 702 for (j = 0; j < countA; j++) { 703 for (k = 0; k < bs; k++) { 704 for (m = 0; m < bs; m++) { 705 if (rstart + ajj[j] * bs > irow || k >= m) { 706 if (reuse == MAT_INITIAL_MATRIX) { 707 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 708 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 709 } 710 val[jj++] = v1[j * bs2 + m + k * bs]; 711 } 712 } 713 } 714 } 715 716 /* B-part */ 717 for (j = 0; j < countB; j++) { 718 for (k = 0; k < bs; k++) { 719 for (m = 0; m < bs; m++) { 720 if (reuse == MAT_INITIAL_MATRIX) { 721 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 722 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 723 } 724 val[jj++] = v2[j * bs2 + m + k * bs]; 725 } 726 } 727 } 728 } else { 729 /* A-part */ 730 for (j = 0; j < countA; j++) { 731 if (reuse == MAT_INITIAL_MATRIX) { 732 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 733 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 734 } 735 val[jj++] = v1[j]; 736 } 737 738 /* B-part */ 739 for (j = 0; j < countB; j++) { 740 if (reuse == MAT_INITIAL_MATRIX) { 741 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 742 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 743 } 744 val[jj++] = v2[j]; 745 } 746 } 747 irow += bs; 748 } 749 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 750 PetscFunctionReturn(PETSC_SUCCESS); 751 } 752 753 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 754 { 755 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 756 PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 757 PetscMUMPSInt *row, *col; 758 const PetscScalar *av, *bv, *v1, *v2; 759 PetscScalar *val; 760 Mat Ad, Ao; 761 Mat_SeqAIJ *aa; 762 Mat_SeqAIJ *bb; 763 764 PetscFunctionBegin; 765 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 766 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 767 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 768 769 aa = (Mat_SeqAIJ *)(Ad)->data; 770 bb = (Mat_SeqAIJ *)(Ao)->data; 771 ai = aa->i; 772 aj = aa->j; 773 bi = bb->i; 774 bj = bb->j; 775 776 rstart = A->rmap->rstart; 777 cstart = A->cmap->rstart; 778 779 if (reuse == MAT_INITIAL_MATRIX) { 780 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 781 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 782 PetscCall(PetscMalloc1(nz, &val)); 783 mumps->nnz = nz; 784 mumps->irn = row; 785 mumps->jcn = col; 786 mumps->val = mumps->val_alloc = val; 787 } else { 788 val = mumps->val; 789 } 790 791 jj = 0; 792 irow = rstart; 793 for (i = 0; i < m; i++) { 794 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 795 countA = ai[i + 1] - ai[i]; 796 countB = bi[i + 1] - bi[i]; 797 bjj = bj + bi[i]; 798 v1 = av + ai[i]; 799 v2 = bv + bi[i]; 800 801 /* A-part */ 802 for (j = 0; j < countA; j++) { 803 if (reuse == MAT_INITIAL_MATRIX) { 804 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 805 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 806 } 807 val[jj++] = v1[j]; 808 } 809 810 /* B-part */ 811 for (j = 0; j < countB; j++) { 812 if (reuse == MAT_INITIAL_MATRIX) { 813 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 814 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 815 } 816 val[jj++] = v2[j]; 817 } 818 irow++; 819 } 820 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 821 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 822 PetscFunctionReturn(PETSC_SUCCESS); 823 } 824 825 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 826 { 827 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 828 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 829 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 830 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 831 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 832 const PetscInt bs2 = mat->bs2; 833 PetscInt bs; 834 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 835 PetscMUMPSInt *row, *col; 836 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 837 PetscScalar *val; 838 839 PetscFunctionBegin; 840 PetscCall(MatGetBlockSize(A, &bs)); 841 if (reuse == MAT_INITIAL_MATRIX) { 842 nz = bs2 * (aa->nz + bb->nz); 843 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 844 PetscCall(PetscMalloc1(nz, &val)); 845 mumps->nnz = nz; 846 mumps->irn = row; 847 mumps->jcn = col; 848 mumps->val = mumps->val_alloc = val; 849 } else { 850 val = mumps->val; 851 } 852 853 jj = 0; 854 irow = rstart; 855 for (i = 0; i < mbs; i++) { 856 countA = ai[i + 1] - ai[i]; 857 countB = bi[i + 1] - bi[i]; 858 ajj = aj + ai[i]; 859 bjj = bj + bi[i]; 860 v1 = av + bs2 * ai[i]; 861 v2 = bv + bs2 * bi[i]; 862 863 idx = 0; 864 /* A-part */ 865 for (k = 0; k < countA; k++) { 866 for (j = 0; j < bs; j++) { 867 for (n = 0; n < bs; n++) { 868 if (reuse == MAT_INITIAL_MATRIX) { 869 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 870 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 871 } 872 val[jj++] = v1[idx++]; 873 } 874 } 875 } 876 877 idx = 0; 878 /* B-part */ 879 for (k = 0; k < countB; k++) { 880 for (j = 0; j < bs; j++) { 881 for (n = 0; n < bs; n++) { 882 if (reuse == MAT_INITIAL_MATRIX) { 883 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 884 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 885 } 886 val[jj++] = v2[idx++]; 887 } 888 } 889 } 890 irow += bs; 891 } 892 PetscFunctionReturn(PETSC_SUCCESS); 893 } 894 895 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 896 { 897 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 898 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 899 PetscMUMPSInt *row, *col; 900 const PetscScalar *av, *bv, *v1, *v2; 901 PetscScalar *val; 902 Mat Ad, Ao; 903 Mat_SeqAIJ *aa; 904 Mat_SeqAIJ *bb; 905 #if defined(PETSC_USE_COMPLEX) 906 PetscBool hermitian, isset; 907 #endif 908 909 PetscFunctionBegin; 910 #if defined(PETSC_USE_COMPLEX) 911 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 912 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 913 #endif 914 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 915 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 916 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 917 918 aa = (Mat_SeqAIJ *)(Ad)->data; 919 bb = (Mat_SeqAIJ *)(Ao)->data; 920 ai = aa->i; 921 aj = aa->j; 922 adiag = aa->diag; 923 bi = bb->i; 924 bj = bb->j; 925 926 rstart = A->rmap->rstart; 927 928 if (reuse == MAT_INITIAL_MATRIX) { 929 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 930 nzb = 0; /* num of upper triangular entries in mat->B */ 931 for (i = 0; i < m; i++) { 932 nza += (ai[i + 1] - adiag[i]); 933 countB = bi[i + 1] - bi[i]; 934 bjj = bj + bi[i]; 935 for (j = 0; j < countB; j++) { 936 if (garray[bjj[j]] > rstart) nzb++; 937 } 938 } 939 940 nz = nza + nzb; /* total nz of upper triangular part of mat */ 941 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 942 PetscCall(PetscMalloc1(nz, &val)); 943 mumps->nnz = nz; 944 mumps->irn = row; 945 mumps->jcn = col; 946 mumps->val = mumps->val_alloc = val; 947 } else { 948 val = mumps->val; 949 } 950 951 jj = 0; 952 irow = rstart; 953 for (i = 0; i < m; i++) { 954 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 955 v1 = av + adiag[i]; 956 countA = ai[i + 1] - adiag[i]; 957 countB = bi[i + 1] - bi[i]; 958 bjj = bj + bi[i]; 959 v2 = bv + bi[i]; 960 961 /* A-part */ 962 for (j = 0; j < countA; j++) { 963 if (reuse == MAT_INITIAL_MATRIX) { 964 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 965 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 966 } 967 val[jj++] = v1[j]; 968 } 969 970 /* B-part */ 971 for (j = 0; j < countB; j++) { 972 if (garray[bjj[j]] > rstart) { 973 if (reuse == MAT_INITIAL_MATRIX) { 974 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 975 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 976 } 977 val[jj++] = v2[j]; 978 } 979 } 980 irow++; 981 } 982 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 983 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 984 PetscFunctionReturn(PETSC_SUCCESS); 985 } 986 987 PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 988 { 989 Mat **mats; 990 PetscInt nr, nc; 991 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 992 993 PetscFunctionBegin; 994 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 995 if (reuse == MAT_INITIAL_MATRIX) { 996 PetscMUMPSInt *irns, *jcns; 997 PetscScalar *vals; 998 PetscInt64 totnnz, cumnnz, maxnnz; 999 PetscInt *pjcns_w; 1000 IS *rows, *cols; 1001 PetscInt **rows_idx, **cols_idx; 1002 1003 cumnnz = 0; 1004 maxnnz = 0; 1005 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1006 for (PetscInt r = 0; r < nr; r++) { 1007 for (PetscInt c = 0; c < nc; c++) { 1008 Mat sub = mats[r][c]; 1009 1010 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1011 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1012 if (sub) { 1013 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1014 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE; 1015 MatInfo info; 1016 1017 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1018 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1019 else { 1020 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1021 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1022 } 1023 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1024 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1025 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1026 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1027 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1028 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1029 1030 if (chol) { 1031 if (r == c) { 1032 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1033 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1034 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1035 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1036 } else { 1037 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1038 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1039 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1040 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1041 } 1042 } else { 1043 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1044 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1045 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1046 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1047 } 1048 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1049 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1050 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1051 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1052 maxnnz = PetscMax(maxnnz, info.nz_used); 1053 } 1054 } 1055 } 1056 1057 /* Allocate total COO */ 1058 totnnz = cumnnz; 1059 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1060 PetscCall(PetscMalloc1(totnnz, &vals)); 1061 1062 /* Handle rows and column maps 1063 We directly map rows and use an SF for the columns */ 1064 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1065 PetscCall(MatNestGetISs(A, rows, cols)); 1066 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1067 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1068 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1069 else (void)maxnnz; 1070 1071 cumnnz = 0; 1072 for (PetscInt r = 0; r < nr; r++) { 1073 for (PetscInt c = 0; c < nc; c++) { 1074 Mat sub = mats[r][c]; 1075 const PetscInt *ridx = rows_idx[r]; 1076 const PetscInt *cidx = cols_idx[c]; 1077 PetscInt rst; 1078 PetscSF csf; 1079 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1080 PetscLayout cmap; 1081 1082 mumps->nest_vals_start[r * nc + c] = cumnnz; 1083 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1084 1085 /* Extract inner blocks if needed */ 1086 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1087 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1088 else { 1089 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1090 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1091 } 1092 swap = (PetscBool)(isTrans || isHTrans); 1093 1094 /* Get column layout to map off-process columns */ 1095 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1096 1097 /* Get row start to map on-process rows */ 1098 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1099 1100 /* Directly use the mumps datastructure and use C ordering for now */ 1101 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1102 1103 /* Swap the role of rows and columns indices for transposed blocks 1104 since we need values with global final ordering */ 1105 if (swap) { 1106 cidx = rows_idx[r]; 1107 ridx = cols_idx[c]; 1108 } 1109 1110 /* Communicate column indices 1111 This could have been done with a single SF but it would have complicated the code a lot. 1112 But since we do it only once, we pay the price of setting up an SF for each block */ 1113 if (PetscDefined(USE_64BIT_INDICES)) { 1114 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1115 } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 1116 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1117 PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1118 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1119 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1120 PetscCall(PetscSFDestroy(&csf)); 1121 1122 /* Import indices: use direct map for rows and mapped indices for columns */ 1123 if (swap) { 1124 for (PetscInt k = 0; k < mumps->nnz; k++) { 1125 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1126 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1127 } 1128 } else { 1129 for (PetscInt k = 0; k < mumps->nnz; k++) { 1130 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1131 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1132 } 1133 } 1134 1135 /* Import values to full COO */ 1136 if (isHTrans) { /* conjugate the entries */ 1137 for (PetscInt k = 0; k < mumps->nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1138 } 1139 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1140 1141 /* Shift new starting point and sanity check */ 1142 cumnnz += mumps->nnz; 1143 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1144 1145 /* Free scratch memory */ 1146 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1147 PetscCall(PetscFree(mumps->val_alloc)); 1148 mumps->val = NULL; 1149 mumps->nnz = 0; 1150 } 1151 } 1152 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1153 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1154 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1155 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1156 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1157 mumps->nest_vals_start[nr * nc] = cumnnz; 1158 1159 /* Set pointers for final MUMPS data structure */ 1160 mumps->nest_vals = vals; 1161 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1162 mumps->val = vals; 1163 mumps->irn = irns; 1164 mumps->jcn = jcns; 1165 mumps->nnz = cumnnz; 1166 } else { 1167 PetscScalar *oval = mumps->nest_vals; 1168 for (PetscInt r = 0; r < nr; r++) { 1169 for (PetscInt c = 0; c < nc; c++) { 1170 PetscBool isTrans, isHTrans = PETSC_FALSE; 1171 Mat sub = mats[r][c]; 1172 PetscInt midx = r * nc + c; 1173 1174 if (!mumps->nest_convert_to_triples[midx]) continue; 1175 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1176 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1177 else { 1178 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1179 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1180 } 1181 mumps->val = oval + mumps->nest_vals_start[midx]; 1182 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1183 if (isHTrans) { 1184 PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1185 for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1186 } 1187 } 1188 } 1189 mumps->val = oval; 1190 } 1191 PetscFunctionReturn(PETSC_SUCCESS); 1192 } 1193 1194 PetscErrorCode MatDestroy_MUMPS(Mat A) 1195 { 1196 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1197 1198 PetscFunctionBegin; 1199 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1200 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1201 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1202 PetscCall(VecDestroy(&mumps->b_seq)); 1203 PetscCall(VecDestroy(&mumps->x_seq)); 1204 PetscCall(PetscFree(mumps->id.perm_in)); 1205 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1206 PetscCall(PetscFree(mumps->val_alloc)); 1207 PetscCall(PetscFree(mumps->info)); 1208 PetscCall(PetscFree(mumps->ICNTL_pre)); 1209 PetscCall(PetscFree(mumps->CNTL_pre)); 1210 PetscCall(MatMumpsResetSchur_Private(mumps)); 1211 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1212 mumps->id.job = JOB_END; 1213 PetscMUMPS_c(mumps); 1214 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1215 if (mumps->mumps_comm != MPI_COMM_NULL) { 1216 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1217 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1218 } 1219 } 1220 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1221 if (mumps->use_petsc_omp_support) { 1222 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1223 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1224 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1225 } 1226 #endif 1227 PetscCall(PetscFree(mumps->ia_alloc)); 1228 PetscCall(PetscFree(mumps->ja_alloc)); 1229 PetscCall(PetscFree(mumps->recvcount)); 1230 PetscCall(PetscFree(mumps->reqs)); 1231 PetscCall(PetscFree(mumps->irhs_loc)); 1232 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1233 PetscCall(PetscFree(mumps->nest_vals)); 1234 PetscCall(PetscFree(A->data)); 1235 1236 /* clear composed functions */ 1237 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1238 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1239 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1240 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1241 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1242 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1243 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1244 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1245 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1246 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1247 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1248 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1249 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1250 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1251 PetscFunctionReturn(PETSC_SUCCESS); 1252 } 1253 1254 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1255 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1256 { 1257 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1258 const PetscMPIInt ompsize = mumps->omp_comm_size; 1259 PetscInt i, m, M, rstart; 1260 1261 PetscFunctionBegin; 1262 PetscCall(MatGetSize(A, &M, NULL)); 1263 PetscCall(MatGetLocalSize(A, &m, NULL)); 1264 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1265 if (ompsize == 1) { 1266 if (!mumps->irhs_loc) { 1267 mumps->nloc_rhs = m; 1268 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1269 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1270 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1271 } 1272 mumps->id.rhs_loc = (MumpsScalar *)array; 1273 } else { 1274 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1275 const PetscInt *ranges; 1276 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1277 MPI_Group petsc_group, omp_group; 1278 PetscScalar *recvbuf = NULL; 1279 1280 if (mumps->is_omp_master) { 1281 /* Lazily initialize the omp stuff for distributed rhs */ 1282 if (!mumps->irhs_loc) { 1283 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1284 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1285 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1286 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1287 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1288 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1289 1290 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1291 mumps->nloc_rhs = 0; 1292 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1293 for (j = 0; j < ompsize; j++) { 1294 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1295 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1296 } 1297 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1298 for (j = k = 0; j < ompsize; j++) { 1299 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1300 } 1301 1302 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1303 PetscCallMPI(MPI_Group_free(&petsc_group)); 1304 PetscCallMPI(MPI_Group_free(&omp_group)); 1305 } 1306 1307 /* Realloc buffers when current nrhs is bigger than what we have met */ 1308 if (nrhs > mumps->max_nrhs) { 1309 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1310 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1311 mumps->max_nrhs = nrhs; 1312 } 1313 1314 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1315 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1316 mumps->rhs_disps[0] = 0; 1317 for (j = 1; j < ompsize; j++) { 1318 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1319 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1320 } 1321 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1322 } 1323 1324 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1325 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1326 1327 if (mumps->is_omp_master) { 1328 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1329 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1330 for (j = 0; j < ompsize; j++) { 1331 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1332 dst = dstbase; 1333 for (i = 0; i < nrhs; i++) { 1334 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1335 src += mumps->rhs_nrow[j]; 1336 dst += mumps->nloc_rhs; 1337 } 1338 dstbase += mumps->rhs_nrow[j]; 1339 } 1340 } 1341 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1342 } 1343 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1344 } 1345 mumps->id.nrhs = nrhs; 1346 mumps->id.nloc_rhs = mumps->nloc_rhs; 1347 mumps->id.lrhs_loc = mumps->nloc_rhs; 1348 mumps->id.irhs_loc = mumps->irhs_loc; 1349 PetscFunctionReturn(PETSC_SUCCESS); 1350 } 1351 1352 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1353 { 1354 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1355 const PetscScalar *rarray = NULL; 1356 PetscScalar *array; 1357 IS is_iden, is_petsc; 1358 PetscInt i; 1359 PetscBool second_solve = PETSC_FALSE; 1360 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1361 1362 PetscFunctionBegin; 1363 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1364 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1365 &cite1)); 1366 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1367 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1368 &cite2)); 1369 1370 if (A->factorerrortype) { 1371 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1372 PetscCall(VecSetInf(x)); 1373 PetscFunctionReturn(PETSC_SUCCESS); 1374 } 1375 1376 mumps->id.nrhs = 1; 1377 if (mumps->petsc_size > 1) { 1378 if (mumps->ICNTL20 == 10) { 1379 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1380 PetscCall(VecGetArrayRead(b, &rarray)); 1381 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1382 } else { 1383 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1384 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1385 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1386 if (!mumps->myid) { 1387 PetscCall(VecGetArray(mumps->b_seq, &array)); 1388 mumps->id.rhs = (MumpsScalar *)array; 1389 } 1390 } 1391 } else { /* petsc_size == 1 */ 1392 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1393 PetscCall(VecCopy(b, x)); 1394 PetscCall(VecGetArray(x, &array)); 1395 mumps->id.rhs = (MumpsScalar *)array; 1396 } 1397 1398 /* 1399 handle condensation step of Schur complement (if any) 1400 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1401 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1402 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1403 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1404 */ 1405 if (mumps->id.size_schur > 0) { 1406 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1407 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1408 second_solve = PETSC_TRUE; 1409 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1410 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1411 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1412 } 1413 /* solve phase */ 1414 mumps->id.job = JOB_SOLVE; 1415 PetscMUMPS_c(mumps); 1416 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1417 1418 /* handle expansion step of Schur complement (if any) */ 1419 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1420 else if (mumps->id.ICNTL(26) == 1) { 1421 PetscCall(MatMumpsSolveSchur_Private(A)); 1422 for (i = 0; i < mumps->id.size_schur; ++i) { 1423 #if !defined(PETSC_USE_COMPLEX) 1424 PetscScalar val = mumps->id.redrhs[i]; 1425 #else 1426 PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i; 1427 #endif 1428 array[mumps->id.listvar_schur[i] - 1] = val; 1429 } 1430 } 1431 1432 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1433 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1434 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1435 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1436 } 1437 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1438 PetscInt *isol2_loc = NULL; 1439 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1440 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1441 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1442 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1443 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1444 PetscCall(ISDestroy(&is_iden)); 1445 PetscCall(ISDestroy(&is_petsc)); 1446 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1447 } 1448 1449 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1450 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1451 } 1452 1453 if (mumps->petsc_size > 1) { 1454 if (mumps->ICNTL20 == 10) { 1455 PetscCall(VecRestoreArrayRead(b, &rarray)); 1456 } else if (!mumps->myid) { 1457 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1458 } 1459 } else PetscCall(VecRestoreArray(x, &array)); 1460 1461 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1462 PetscFunctionReturn(PETSC_SUCCESS); 1463 } 1464 1465 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1466 { 1467 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1468 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1469 1470 PetscFunctionBegin; 1471 mumps->id.ICNTL(9) = 0; 1472 PetscCall(MatSolve_MUMPS(A, b, x)); 1473 mumps->id.ICNTL(9) = value; 1474 PetscFunctionReturn(PETSC_SUCCESS); 1475 } 1476 1477 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1478 { 1479 Mat Bt = NULL; 1480 PetscBool denseX, denseB, flg, flgT; 1481 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1482 PetscInt i, nrhs, M; 1483 PetscScalar *array; 1484 const PetscScalar *rbray; 1485 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1486 PetscMUMPSInt *isol_loc, *isol_loc_save; 1487 PetscScalar *bray, *sol_loc, *sol_loc_save; 1488 IS is_to, is_from; 1489 PetscInt k, proc, j, m, myrstart; 1490 const PetscInt *rstart; 1491 Vec v_mpi, msol_loc; 1492 VecScatter scat_sol; 1493 Vec b_seq; 1494 VecScatter scat_rhs; 1495 PetscScalar *aa; 1496 PetscInt spnr, *ia, *ja; 1497 Mat_MPIAIJ *b = NULL; 1498 1499 PetscFunctionBegin; 1500 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1501 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1502 1503 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1504 if (denseB) { 1505 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1506 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1507 } else { /* sparse B */ 1508 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1509 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1510 if (flgT) { /* input B is transpose of actual RHS matrix, 1511 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1512 PetscCall(MatTransposeGetMat(B, &Bt)); 1513 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1514 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1515 } 1516 1517 PetscCall(MatGetSize(B, &M, &nrhs)); 1518 mumps->id.nrhs = nrhs; 1519 mumps->id.lrhs = M; 1520 mumps->id.rhs = NULL; 1521 1522 if (mumps->petsc_size == 1) { 1523 PetscScalar *aa; 1524 PetscInt spnr, *ia, *ja; 1525 PetscBool second_solve = PETSC_FALSE; 1526 1527 PetscCall(MatDenseGetArray(X, &array)); 1528 mumps->id.rhs = (MumpsScalar *)array; 1529 1530 if (denseB) { 1531 /* copy B to X */ 1532 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1533 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1534 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1535 } else { /* sparse B */ 1536 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1537 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1538 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1539 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1540 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1541 } 1542 /* handle condensation step of Schur complement (if any) */ 1543 if (mumps->id.size_schur > 0) { 1544 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1545 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1546 second_solve = PETSC_TRUE; 1547 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1548 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1549 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1550 } 1551 /* solve phase */ 1552 mumps->id.job = JOB_SOLVE; 1553 PetscMUMPS_c(mumps); 1554 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1555 1556 /* handle expansion step of Schur complement (if any) */ 1557 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1558 else if (mumps->id.ICNTL(26) == 1) { 1559 PetscCall(MatMumpsSolveSchur_Private(A)); 1560 for (j = 0; j < nrhs; ++j) 1561 for (i = 0; i < mumps->id.size_schur; ++i) { 1562 #if !defined(PETSC_USE_COMPLEX) 1563 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs]; 1564 #else 1565 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i; 1566 #endif 1567 array[mumps->id.listvar_schur[i] - 1 + j * M] = val; 1568 } 1569 } 1570 if (!denseB) { /* sparse B */ 1571 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1572 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1573 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1574 } 1575 PetscCall(MatDenseRestoreArray(X, &array)); 1576 PetscFunctionReturn(PETSC_SUCCESS); 1577 } 1578 1579 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1580 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1581 1582 /* create msol_loc to hold mumps local solution */ 1583 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1584 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1585 1586 lsol_loc = mumps->id.lsol_loc; 1587 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1588 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1589 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1590 mumps->id.isol_loc = isol_loc; 1591 1592 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1593 1594 if (denseB) { 1595 if (mumps->ICNTL20 == 10) { 1596 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1597 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1598 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1599 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1600 PetscCall(MatGetLocalSize(B, &m, NULL)); 1601 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1602 } else { 1603 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1604 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1605 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1606 0, re-arrange B into desired order, which is a local operation. 1607 */ 1608 1609 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1610 /* wrap dense rhs matrix B into a vector v_mpi */ 1611 PetscCall(MatGetLocalSize(B, &m, NULL)); 1612 PetscCall(MatDenseGetArray(B, &bray)); 1613 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1614 PetscCall(MatDenseRestoreArray(B, &bray)); 1615 1616 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1617 if (!mumps->myid) { 1618 PetscInt *idx; 1619 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1620 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1621 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1622 k = 0; 1623 for (proc = 0; proc < mumps->petsc_size; proc++) { 1624 for (j = 0; j < nrhs; j++) { 1625 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1626 } 1627 } 1628 1629 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1630 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1631 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1632 } else { 1633 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1634 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1635 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1636 } 1637 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1638 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1639 PetscCall(ISDestroy(&is_to)); 1640 PetscCall(ISDestroy(&is_from)); 1641 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1642 1643 if (!mumps->myid) { /* define rhs on the host */ 1644 PetscCall(VecGetArray(b_seq, &bray)); 1645 mumps->id.rhs = (MumpsScalar *)bray; 1646 PetscCall(VecRestoreArray(b_seq, &bray)); 1647 } 1648 } 1649 } else { /* sparse B */ 1650 b = (Mat_MPIAIJ *)Bt->data; 1651 1652 /* wrap dense X into a vector v_mpi */ 1653 PetscCall(MatGetLocalSize(X, &m, NULL)); 1654 PetscCall(MatDenseGetArray(X, &bray)); 1655 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1656 PetscCall(MatDenseRestoreArray(X, &bray)); 1657 1658 if (!mumps->myid) { 1659 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1660 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1661 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1662 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1663 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1664 } else { 1665 mumps->id.irhs_ptr = NULL; 1666 mumps->id.irhs_sparse = NULL; 1667 mumps->id.nz_rhs = 0; 1668 mumps->id.rhs_sparse = NULL; 1669 } 1670 } 1671 1672 /* solve phase */ 1673 mumps->id.job = JOB_SOLVE; 1674 PetscMUMPS_c(mumps); 1675 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1676 1677 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1678 PetscCall(MatDenseGetArray(X, &array)); 1679 PetscCall(VecPlaceArray(v_mpi, array)); 1680 1681 /* create scatter scat_sol */ 1682 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1683 /* iidx: index for scatter mumps solution to petsc X */ 1684 1685 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1686 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1687 for (i = 0; i < lsol_loc; i++) { 1688 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1689 1690 for (proc = 0; proc < mumps->petsc_size; proc++) { 1691 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1692 myrstart = rstart[proc]; 1693 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1694 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1695 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1696 break; 1697 } 1698 } 1699 1700 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1701 } 1702 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1703 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1704 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1705 PetscCall(ISDestroy(&is_from)); 1706 PetscCall(ISDestroy(&is_to)); 1707 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1708 PetscCall(MatDenseRestoreArray(X, &array)); 1709 1710 /* free spaces */ 1711 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1712 mumps->id.isol_loc = isol_loc_save; 1713 1714 PetscCall(PetscFree2(sol_loc, isol_loc)); 1715 PetscCall(PetscFree(idxx)); 1716 PetscCall(VecDestroy(&msol_loc)); 1717 PetscCall(VecDestroy(&v_mpi)); 1718 if (!denseB) { 1719 if (!mumps->myid) { 1720 b = (Mat_MPIAIJ *)Bt->data; 1721 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1722 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1723 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1724 } 1725 } else { 1726 if (mumps->ICNTL20 == 0) { 1727 PetscCall(VecDestroy(&b_seq)); 1728 PetscCall(VecScatterDestroy(&scat_rhs)); 1729 } 1730 } 1731 PetscCall(VecScatterDestroy(&scat_sol)); 1732 PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)))); 1733 PetscFunctionReturn(PETSC_SUCCESS); 1734 } 1735 1736 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1737 { 1738 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1739 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1740 1741 PetscFunctionBegin; 1742 mumps->id.ICNTL(9) = 0; 1743 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1744 mumps->id.ICNTL(9) = value; 1745 PetscFunctionReturn(PETSC_SUCCESS); 1746 } 1747 1748 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1749 { 1750 PetscBool flg; 1751 Mat B; 1752 1753 PetscFunctionBegin; 1754 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1755 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1756 1757 /* Create B=Bt^T that uses Bt's data structure */ 1758 PetscCall(MatCreateTranspose(Bt, &B)); 1759 1760 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1761 PetscCall(MatDestroy(&B)); 1762 PetscFunctionReturn(PETSC_SUCCESS); 1763 } 1764 1765 #if !defined(PETSC_USE_COMPLEX) 1766 /* 1767 input: 1768 F: numeric factor 1769 output: 1770 nneg: total number of negative pivots 1771 nzero: total number of zero pivots 1772 npos: (global dimension of F) - nneg - nzero 1773 */ 1774 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1775 { 1776 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1777 PetscMPIInt size; 1778 1779 PetscFunctionBegin; 1780 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1781 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1782 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1783 1784 if (nneg) *nneg = mumps->id.INFOG(12); 1785 if (nzero || npos) { 1786 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1787 if (nzero) *nzero = mumps->id.INFOG(28); 1788 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1789 } 1790 PetscFunctionReturn(PETSC_SUCCESS); 1791 } 1792 #endif 1793 1794 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1795 { 1796 PetscInt i, nreqs; 1797 PetscMUMPSInt *irn, *jcn; 1798 PetscMPIInt count; 1799 PetscInt64 totnnz, remain; 1800 const PetscInt osize = mumps->omp_comm_size; 1801 PetscScalar *val; 1802 1803 PetscFunctionBegin; 1804 if (osize > 1) { 1805 if (reuse == MAT_INITIAL_MATRIX) { 1806 /* master first gathers counts of nonzeros to receive */ 1807 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1808 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1809 1810 /* Then each computes number of send/recvs */ 1811 if (mumps->is_omp_master) { 1812 /* Start from 1 since self communication is not done in MPI */ 1813 nreqs = 0; 1814 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1815 } else { 1816 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1817 } 1818 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1819 1820 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1821 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1822 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1823 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1824 */ 1825 nreqs = 0; /* counter for actual send/recvs */ 1826 if (mumps->is_omp_master) { 1827 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1828 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1829 PetscCall(PetscMalloc1(totnnz, &val)); 1830 1831 /* Self communication */ 1832 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1833 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1834 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1835 1836 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1837 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1838 PetscCall(PetscFree(mumps->val_alloc)); 1839 mumps->nnz = totnnz; 1840 mumps->irn = irn; 1841 mumps->jcn = jcn; 1842 mumps->val = mumps->val_alloc = val; 1843 1844 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1845 jcn += mumps->recvcount[0]; 1846 val += mumps->recvcount[0]; 1847 1848 /* Remote communication */ 1849 for (i = 1; i < osize; i++) { 1850 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1851 remain = mumps->recvcount[i] - count; 1852 while (count > 0) { 1853 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1854 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1855 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1856 irn += count; 1857 jcn += count; 1858 val += count; 1859 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1860 remain -= count; 1861 } 1862 } 1863 } else { 1864 irn = mumps->irn; 1865 jcn = mumps->jcn; 1866 val = mumps->val; 1867 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1868 remain = mumps->nnz - count; 1869 while (count > 0) { 1870 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1871 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1872 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1873 irn += count; 1874 jcn += count; 1875 val += count; 1876 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1877 remain -= count; 1878 } 1879 } 1880 } else { 1881 nreqs = 0; 1882 if (mumps->is_omp_master) { 1883 val = mumps->val + mumps->recvcount[0]; 1884 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1885 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1886 remain = mumps->recvcount[i] - count; 1887 while (count > 0) { 1888 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1889 val += count; 1890 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1891 remain -= count; 1892 } 1893 } 1894 } else { 1895 val = mumps->val; 1896 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1897 remain = mumps->nnz - count; 1898 while (count > 0) { 1899 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1900 val += count; 1901 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1902 remain -= count; 1903 } 1904 } 1905 } 1906 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1907 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1908 } 1909 PetscFunctionReturn(PETSC_SUCCESS); 1910 } 1911 1912 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1913 { 1914 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1915 PetscBool isMPIAIJ; 1916 1917 PetscFunctionBegin; 1918 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1919 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1920 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1921 PetscFunctionReturn(PETSC_SUCCESS); 1922 } 1923 1924 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1925 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1926 1927 /* numerical factorization phase */ 1928 mumps->id.job = JOB_FACTNUMERIC; 1929 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1930 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1931 } else { 1932 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1933 } 1934 PetscMUMPS_c(mumps); 1935 if (mumps->id.INFOG(1) < 0) { 1936 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 1937 if (mumps->id.INFOG(1) == -10) { 1938 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1939 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1940 } else if (mumps->id.INFOG(1) == -13) { 1941 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1942 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1943 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1944 PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1945 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1946 } else { 1947 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1948 F->factorerrortype = MAT_FACTOR_OTHER; 1949 } 1950 } 1951 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 1952 1953 F->assembled = PETSC_TRUE; 1954 1955 if (F->schur) { /* reset Schur status to unfactored */ 1956 #if defined(PETSC_HAVE_CUDA) 1957 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1958 #endif 1959 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1960 mumps->id.ICNTL(19) = 2; 1961 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1962 } 1963 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1964 } 1965 1966 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1967 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1968 1969 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1970 if (mumps->petsc_size > 1) { 1971 PetscInt lsol_loc; 1972 PetscScalar *sol_loc; 1973 1974 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1975 1976 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1977 if (mumps->x_seq) { 1978 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1979 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1980 PetscCall(VecDestroy(&mumps->x_seq)); 1981 } 1982 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1983 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1984 mumps->id.lsol_loc = lsol_loc; 1985 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1986 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 1987 } 1988 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1989 PetscFunctionReturn(PETSC_SUCCESS); 1990 } 1991 1992 /* Sets MUMPS options from the options database */ 1993 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1994 { 1995 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1996 PetscMUMPSInt icntl = 0, size, *listvar_schur; 1997 PetscInt info[80], i, ninfo = 80, rbs, cbs; 1998 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1999 MumpsScalar *arr; 2000 2001 PetscFunctionBegin; 2002 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2003 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2004 PetscInt nthreads = 0; 2005 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2006 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2007 2008 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2009 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2010 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2011 2012 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2013 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2014 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2015 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2016 if (mumps->use_petsc_omp_support) { 2017 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2018 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2019 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2020 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2021 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2022 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2023 #endif 2024 } else { 2025 mumps->omp_comm = PETSC_COMM_SELF; 2026 mumps->mumps_comm = mumps->petsc_comm; 2027 mumps->is_omp_master = PETSC_TRUE; 2028 } 2029 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2030 mumps->reqs = NULL; 2031 mumps->tag = 0; 2032 2033 if (mumps->mumps_comm != MPI_COMM_NULL) { 2034 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2035 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2036 MPI_Comm comm; 2037 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2038 mumps->mumps_comm = comm; 2039 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2040 } 2041 2042 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2043 mumps->id.job = JOB_INIT; 2044 mumps->id.par = 1; /* host participates factorizaton and solve */ 2045 mumps->id.sym = mumps->sym; 2046 2047 size = mumps->id.size_schur; 2048 arr = mumps->id.schur; 2049 listvar_schur = mumps->id.listvar_schur; 2050 PetscMUMPS_c(mumps); 2051 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2052 /* restore cached ICNTL and CNTL values */ 2053 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2054 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2055 PetscCall(PetscFree(mumps->ICNTL_pre)); 2056 PetscCall(PetscFree(mumps->CNTL_pre)); 2057 2058 if (schur) { 2059 mumps->id.size_schur = size; 2060 mumps->id.schur_lld = size; 2061 mumps->id.schur = arr; 2062 mumps->id.listvar_schur = listvar_schur; 2063 if (mumps->petsc_size > 1) { 2064 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2065 2066 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2067 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2068 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2069 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2070 } else { 2071 if (F->factortype == MAT_FACTOR_LU) { 2072 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2073 } else { 2074 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2075 } 2076 } 2077 mumps->id.ICNTL(26) = -1; 2078 } 2079 2080 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2081 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2082 */ 2083 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2084 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2085 2086 mumps->scat_rhs = NULL; 2087 mumps->scat_sol = NULL; 2088 2089 /* set PETSc-MUMPS default options - override MUMPS default */ 2090 mumps->id.ICNTL(3) = 0; 2091 mumps->id.ICNTL(4) = 0; 2092 if (mumps->petsc_size == 1) { 2093 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2094 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2095 } else { 2096 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2097 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2098 } 2099 } 2100 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2101 if (flg) mumps->id.ICNTL(1) = icntl; 2102 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2103 if (flg) mumps->id.ICNTL(2) = icntl; 2104 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2105 if (flg) mumps->id.ICNTL(3) = icntl; 2106 2107 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2108 if (flg) mumps->id.ICNTL(4) = icntl; 2109 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2110 2111 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2112 if (flg) mumps->id.ICNTL(6) = icntl; 2113 2114 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2115 if (flg) { 2116 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2117 mumps->id.ICNTL(7) = icntl; 2118 } 2119 2120 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2121 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2122 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2123 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2124 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2125 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2126 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2127 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2128 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2129 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2130 if (flg) { 2131 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2132 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2133 } 2134 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2135 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2136 PetscCall(MatDestroy(&F->schur)); 2137 PetscCall(MatMumpsResetSchur_Private(mumps)); 2138 } 2139 2140 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2141 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2142 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2143 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2144 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2145 In short, we could not use distributed RHS with MPICH until v4.0b1. 2146 */ 2147 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 2148 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2149 #else 2150 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2151 #endif 2152 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2153 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2154 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2155 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2156 #endif 2157 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2158 2159 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2160 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2161 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2162 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2163 2164 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2165 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2166 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2167 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2168 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2169 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2170 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2171 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2172 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2173 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2174 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2175 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2176 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2177 2178 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2179 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2180 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2181 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2182 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2183 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2184 2185 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2186 2187 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2188 if (ninfo) { 2189 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2190 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2191 mumps->ninfo = ninfo; 2192 for (i = 0; i < ninfo; i++) { 2193 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2194 mumps->info[i] = info[i]; 2195 } 2196 } 2197 PetscOptionsEnd(); 2198 PetscFunctionReturn(PETSC_SUCCESS); 2199 } 2200 2201 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2202 { 2203 PetscFunctionBegin; 2204 if (mumps->id.INFOG(1) < 0) { 2205 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2206 if (mumps->id.INFOG(1) == -6) { 2207 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2208 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2209 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2210 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2211 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2212 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2213 PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n")); 2214 } else { 2215 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2216 F->factorerrortype = MAT_FACTOR_OTHER; 2217 } 2218 } 2219 PetscFunctionReturn(PETSC_SUCCESS); 2220 } 2221 2222 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2223 { 2224 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2225 Vec b; 2226 const PetscInt M = A->rmap->N; 2227 2228 PetscFunctionBegin; 2229 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2230 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2231 PetscFunctionReturn(PETSC_SUCCESS); 2232 } 2233 2234 /* Set MUMPS options from the options database */ 2235 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2236 2237 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2238 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2239 2240 /* analysis phase */ 2241 mumps->id.job = JOB_FACTSYMBOLIC; 2242 mumps->id.n = M; 2243 switch (mumps->id.ICNTL(18)) { 2244 case 0: /* centralized assembled matrix input */ 2245 if (!mumps->myid) { 2246 mumps->id.nnz = mumps->nnz; 2247 mumps->id.irn = mumps->irn; 2248 mumps->id.jcn = mumps->jcn; 2249 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2250 if (r) { 2251 mumps->id.ICNTL(7) = 1; 2252 if (!mumps->myid) { 2253 const PetscInt *idx; 2254 PetscInt i; 2255 2256 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2257 PetscCall(ISGetIndices(r, &idx)); 2258 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2259 PetscCall(ISRestoreIndices(r, &idx)); 2260 } 2261 } 2262 } 2263 break; 2264 case 3: /* distributed assembled matrix input (size>1) */ 2265 mumps->id.nnz_loc = mumps->nnz; 2266 mumps->id.irn_loc = mumps->irn; 2267 mumps->id.jcn_loc = mumps->jcn; 2268 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2269 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2270 PetscCall(MatCreateVecs(A, NULL, &b)); 2271 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2272 PetscCall(VecDestroy(&b)); 2273 } 2274 break; 2275 } 2276 PetscMUMPS_c(mumps); 2277 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2278 2279 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2280 F->ops->solve = MatSolve_MUMPS; 2281 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2282 F->ops->matsolve = MatMatSolve_MUMPS; 2283 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2284 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2285 2286 mumps->matstruc = SAME_NONZERO_PATTERN; 2287 PetscFunctionReturn(PETSC_SUCCESS); 2288 } 2289 2290 /* Note the Petsc r and c permutations are ignored */ 2291 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2292 { 2293 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2294 Vec b; 2295 const PetscInt M = A->rmap->N; 2296 2297 PetscFunctionBegin; 2298 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2299 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2300 PetscFunctionReturn(PETSC_SUCCESS); 2301 } 2302 2303 /* Set MUMPS options from the options database */ 2304 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2305 2306 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2307 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2308 2309 /* analysis phase */ 2310 mumps->id.job = JOB_FACTSYMBOLIC; 2311 mumps->id.n = M; 2312 switch (mumps->id.ICNTL(18)) { 2313 case 0: /* centralized assembled matrix input */ 2314 if (!mumps->myid) { 2315 mumps->id.nnz = mumps->nnz; 2316 mumps->id.irn = mumps->irn; 2317 mumps->id.jcn = mumps->jcn; 2318 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2319 } 2320 break; 2321 case 3: /* distributed assembled matrix input (size>1) */ 2322 mumps->id.nnz_loc = mumps->nnz; 2323 mumps->id.irn_loc = mumps->irn; 2324 mumps->id.jcn_loc = mumps->jcn; 2325 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2326 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2327 PetscCall(MatCreateVecs(A, NULL, &b)); 2328 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2329 PetscCall(VecDestroy(&b)); 2330 } 2331 break; 2332 } 2333 PetscMUMPS_c(mumps); 2334 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2335 2336 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2337 F->ops->solve = MatSolve_MUMPS; 2338 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2339 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2340 2341 mumps->matstruc = SAME_NONZERO_PATTERN; 2342 PetscFunctionReturn(PETSC_SUCCESS); 2343 } 2344 2345 /* Note the Petsc r permutation and factor info are ignored */ 2346 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2347 { 2348 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2349 Vec b; 2350 const PetscInt M = A->rmap->N; 2351 2352 PetscFunctionBegin; 2353 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2354 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2355 PetscFunctionReturn(PETSC_SUCCESS); 2356 } 2357 2358 /* Set MUMPS options from the options database */ 2359 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2360 2361 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2362 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2363 2364 /* analysis phase */ 2365 mumps->id.job = JOB_FACTSYMBOLIC; 2366 mumps->id.n = M; 2367 switch (mumps->id.ICNTL(18)) { 2368 case 0: /* centralized assembled matrix input */ 2369 if (!mumps->myid) { 2370 mumps->id.nnz = mumps->nnz; 2371 mumps->id.irn = mumps->irn; 2372 mumps->id.jcn = mumps->jcn; 2373 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2374 } 2375 break; 2376 case 3: /* distributed assembled matrix input (size>1) */ 2377 mumps->id.nnz_loc = mumps->nnz; 2378 mumps->id.irn_loc = mumps->irn; 2379 mumps->id.jcn_loc = mumps->jcn; 2380 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2381 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2382 PetscCall(MatCreateVecs(A, NULL, &b)); 2383 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2384 PetscCall(VecDestroy(&b)); 2385 } 2386 break; 2387 } 2388 PetscMUMPS_c(mumps); 2389 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2390 2391 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2392 F->ops->solve = MatSolve_MUMPS; 2393 F->ops->solvetranspose = MatSolve_MUMPS; 2394 F->ops->matsolve = MatMatSolve_MUMPS; 2395 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2396 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2397 #if defined(PETSC_USE_COMPLEX) 2398 F->ops->getinertia = NULL; 2399 #else 2400 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2401 #endif 2402 2403 mumps->matstruc = SAME_NONZERO_PATTERN; 2404 PetscFunctionReturn(PETSC_SUCCESS); 2405 } 2406 2407 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2408 { 2409 PetscBool iascii; 2410 PetscViewerFormat format; 2411 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2412 2413 PetscFunctionBegin; 2414 /* check if matrix is mumps type */ 2415 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2416 2417 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2418 if (iascii) { 2419 PetscCall(PetscViewerGetFormat(viewer, &format)); 2420 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2421 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2422 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2423 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2424 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2425 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2426 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2427 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2428 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2429 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2430 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2431 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2432 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2433 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2434 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2435 if (mumps->id.ICNTL(11) > 0) { 2436 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2437 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2438 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2439 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2440 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2441 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2442 } 2443 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2444 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2445 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2446 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2447 /* ICNTL(15-17) not used */ 2448 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2449 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2450 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2451 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2452 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2453 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2454 2455 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2456 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2457 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2458 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2459 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2460 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2461 2462 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2463 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2464 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2465 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2466 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2467 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2468 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2469 2470 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2471 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2472 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2473 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2474 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2475 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2476 2477 /* information local to each processor */ 2478 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2479 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2480 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2481 PetscCall(PetscViewerFlush(viewer)); 2482 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2483 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2484 PetscCall(PetscViewerFlush(viewer)); 2485 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2486 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2487 PetscCall(PetscViewerFlush(viewer)); 2488 2489 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2490 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2491 PetscCall(PetscViewerFlush(viewer)); 2492 2493 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2494 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2495 PetscCall(PetscViewerFlush(viewer)); 2496 2497 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2498 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2499 PetscCall(PetscViewerFlush(viewer)); 2500 2501 if (mumps->ninfo && mumps->ninfo <= 80) { 2502 PetscInt i; 2503 for (i = 0; i < mumps->ninfo; i++) { 2504 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2505 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2506 PetscCall(PetscViewerFlush(viewer)); 2507 } 2508 } 2509 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2510 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2511 2512 if (mumps->myid == 0) { /* information from the host */ 2513 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2514 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2515 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2516 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2517 2518 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2519 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2520 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2521 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2522 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2523 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2524 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2525 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2526 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2527 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2528 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2529 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2530 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2531 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2532 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2533 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2534 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2535 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2536 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2537 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2538 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2539 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2540 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2541 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2542 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2543 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2544 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2545 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2546 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2547 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2548 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2549 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2550 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2551 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2552 } 2553 } 2554 } 2555 PetscFunctionReturn(PETSC_SUCCESS); 2556 } 2557 2558 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2559 { 2560 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2561 2562 PetscFunctionBegin; 2563 info->block_size = 1.0; 2564 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2565 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2566 info->nz_unneeded = 0.0; 2567 info->assemblies = 0.0; 2568 info->mallocs = 0.0; 2569 info->memory = 0.0; 2570 info->fill_ratio_given = 0; 2571 info->fill_ratio_needed = 0; 2572 info->factor_mallocs = 0; 2573 PetscFunctionReturn(PETSC_SUCCESS); 2574 } 2575 2576 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2577 { 2578 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2579 const PetscScalar *arr; 2580 const PetscInt *idxs; 2581 PetscInt size, i; 2582 2583 PetscFunctionBegin; 2584 PetscCall(ISGetLocalSize(is, &size)); 2585 /* Schur complement matrix */ 2586 PetscCall(MatDestroy(&F->schur)); 2587 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2588 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2589 mumps->id.schur = (MumpsScalar *)arr; 2590 mumps->id.size_schur = size; 2591 mumps->id.schur_lld = size; 2592 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2593 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2594 2595 /* MUMPS expects Fortran style indices */ 2596 PetscCall(PetscFree(mumps->id.listvar_schur)); 2597 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2598 PetscCall(ISGetIndices(is, &idxs)); 2599 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2600 PetscCall(ISRestoreIndices(is, &idxs)); 2601 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2602 mumps->id.ICNTL(26) = -1; 2603 PetscFunctionReturn(PETSC_SUCCESS); 2604 } 2605 2606 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2607 { 2608 Mat St; 2609 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2610 PetscScalar *array; 2611 2612 PetscFunctionBegin; 2613 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 2614 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2615 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2616 PetscCall(MatSetType(St, MATDENSE)); 2617 PetscCall(MatSetUp(St)); 2618 PetscCall(MatDenseGetArray(St, &array)); 2619 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2620 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2621 PetscInt i, j, N = mumps->id.size_schur; 2622 for (i = 0; i < N; i++) { 2623 for (j = 0; j < N; j++) { 2624 #if !defined(PETSC_USE_COMPLEX) 2625 PetscScalar val = mumps->id.schur[i * N + j]; 2626 #else 2627 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2628 #endif 2629 array[j * N + i] = val; 2630 } 2631 } 2632 } else { /* stored by columns */ 2633 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2634 } 2635 } else { /* either full or lower-triangular (not packed) */ 2636 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2637 PetscInt i, j, N = mumps->id.size_schur; 2638 for (i = 0; i < N; i++) { 2639 for (j = i; j < N; j++) { 2640 #if !defined(PETSC_USE_COMPLEX) 2641 PetscScalar val = mumps->id.schur[i * N + j]; 2642 #else 2643 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2644 #endif 2645 array[i * N + j] = array[j * N + i] = val; 2646 } 2647 } 2648 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2649 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2650 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2651 PetscInt i, j, N = mumps->id.size_schur; 2652 for (i = 0; i < N; i++) { 2653 for (j = 0; j < i + 1; j++) { 2654 #if !defined(PETSC_USE_COMPLEX) 2655 PetscScalar val = mumps->id.schur[i * N + j]; 2656 #else 2657 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2658 #endif 2659 array[i * N + j] = array[j * N + i] = val; 2660 } 2661 } 2662 } 2663 } 2664 PetscCall(MatDenseRestoreArray(St, &array)); 2665 *S = St; 2666 PetscFunctionReturn(PETSC_SUCCESS); 2667 } 2668 2669 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2670 { 2671 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2672 2673 PetscFunctionBegin; 2674 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2675 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2676 for (i = 0; i < nICNTL_pre; ++i) 2677 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2678 if (i == nICNTL_pre) { /* not already cached */ 2679 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2680 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2681 mumps->ICNTL_pre[0]++; 2682 } 2683 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2684 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2685 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2686 PetscFunctionReturn(PETSC_SUCCESS); 2687 } 2688 2689 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2690 { 2691 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2692 2693 PetscFunctionBegin; 2694 if (mumps->id.job == JOB_NULL) { 2695 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2696 *ival = 0; 2697 for (i = 0; i < nICNTL_pre; ++i) { 2698 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2699 } 2700 } else *ival = mumps->id.ICNTL(icntl); 2701 PetscFunctionReturn(PETSC_SUCCESS); 2702 } 2703 2704 /*@ 2705 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2706 2707 Logically Collective 2708 2709 Input Parameters: 2710 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2711 . icntl - index of MUMPS parameter array ICNTL() 2712 - ival - value of MUMPS ICNTL(icntl) 2713 2714 Options Database Key: 2715 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2716 2717 Level: beginner 2718 2719 References: 2720 . * - MUMPS Users' Guide 2721 2722 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2723 @*/ 2724 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2725 { 2726 PetscFunctionBegin; 2727 PetscValidType(F, 1); 2728 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2729 PetscValidLogicalCollectiveInt(F, icntl, 2); 2730 PetscValidLogicalCollectiveInt(F, ival, 3); 2731 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2732 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2733 PetscFunctionReturn(PETSC_SUCCESS); 2734 } 2735 2736 /*@ 2737 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2738 2739 Logically Collective 2740 2741 Input Parameters: 2742 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2743 - icntl - index of MUMPS parameter array ICNTL() 2744 2745 Output Parameter: 2746 . ival - value of MUMPS ICNTL(icntl) 2747 2748 Level: beginner 2749 2750 References: 2751 . * - MUMPS Users' Guide 2752 2753 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2754 @*/ 2755 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2756 { 2757 PetscFunctionBegin; 2758 PetscValidType(F, 1); 2759 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2760 PetscValidLogicalCollectiveInt(F, icntl, 2); 2761 PetscAssertPointer(ival, 3); 2762 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2763 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2764 PetscFunctionReturn(PETSC_SUCCESS); 2765 } 2766 2767 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2768 { 2769 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2770 2771 PetscFunctionBegin; 2772 if (mumps->id.job == JOB_NULL) { 2773 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2774 for (i = 0; i < nCNTL_pre; ++i) 2775 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2776 if (i == nCNTL_pre) { 2777 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2778 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2779 mumps->CNTL_pre[0]++; 2780 } 2781 mumps->CNTL_pre[1 + 2 * i] = icntl; 2782 mumps->CNTL_pre[2 + 2 * i] = val; 2783 } else mumps->id.CNTL(icntl) = val; 2784 PetscFunctionReturn(PETSC_SUCCESS); 2785 } 2786 2787 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2788 { 2789 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2790 2791 PetscFunctionBegin; 2792 if (mumps->id.job == JOB_NULL) { 2793 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2794 *val = 0.0; 2795 for (i = 0; i < nCNTL_pre; ++i) { 2796 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2797 } 2798 } else *val = mumps->id.CNTL(icntl); 2799 PetscFunctionReturn(PETSC_SUCCESS); 2800 } 2801 2802 /*@ 2803 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2804 2805 Logically Collective 2806 2807 Input Parameters: 2808 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2809 . icntl - index of MUMPS parameter array CNTL() 2810 - val - value of MUMPS CNTL(icntl) 2811 2812 Options Database Key: 2813 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2814 2815 Level: beginner 2816 2817 References: 2818 . * - MUMPS Users' Guide 2819 2820 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2821 @*/ 2822 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2823 { 2824 PetscFunctionBegin; 2825 PetscValidType(F, 1); 2826 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2827 PetscValidLogicalCollectiveInt(F, icntl, 2); 2828 PetscValidLogicalCollectiveReal(F, val, 3); 2829 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2830 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2831 PetscFunctionReturn(PETSC_SUCCESS); 2832 } 2833 2834 /*@ 2835 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2836 2837 Logically Collective 2838 2839 Input Parameters: 2840 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2841 - icntl - index of MUMPS parameter array CNTL() 2842 2843 Output Parameter: 2844 . val - value of MUMPS CNTL(icntl) 2845 2846 Level: beginner 2847 2848 References: 2849 . * - MUMPS Users' Guide 2850 2851 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2852 @*/ 2853 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2854 { 2855 PetscFunctionBegin; 2856 PetscValidType(F, 1); 2857 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2858 PetscValidLogicalCollectiveInt(F, icntl, 2); 2859 PetscAssertPointer(val, 3); 2860 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2861 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2862 PetscFunctionReturn(PETSC_SUCCESS); 2863 } 2864 2865 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2866 { 2867 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2868 2869 PetscFunctionBegin; 2870 *info = mumps->id.INFO(icntl); 2871 PetscFunctionReturn(PETSC_SUCCESS); 2872 } 2873 2874 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2875 { 2876 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2877 2878 PetscFunctionBegin; 2879 *infog = mumps->id.INFOG(icntl); 2880 PetscFunctionReturn(PETSC_SUCCESS); 2881 } 2882 2883 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2884 { 2885 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2886 2887 PetscFunctionBegin; 2888 *rinfo = mumps->id.RINFO(icntl); 2889 PetscFunctionReturn(PETSC_SUCCESS); 2890 } 2891 2892 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2893 { 2894 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2895 2896 PetscFunctionBegin; 2897 *rinfog = mumps->id.RINFOG(icntl); 2898 PetscFunctionReturn(PETSC_SUCCESS); 2899 } 2900 2901 PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2902 { 2903 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2904 2905 PetscFunctionBegin; 2906 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2907 *size = 0; 2908 *array = NULL; 2909 if (!mumps->myid) { 2910 *size = mumps->id.INFOG(28); 2911 PetscCall(PetscMalloc1(*size, array)); 2912 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2913 } 2914 PetscFunctionReturn(PETSC_SUCCESS); 2915 } 2916 2917 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2918 { 2919 Mat Bt = NULL, Btseq = NULL; 2920 PetscBool flg; 2921 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2922 PetscScalar *aa; 2923 PetscInt spnr, *ia, *ja, M, nrhs; 2924 2925 PetscFunctionBegin; 2926 PetscAssertPointer(spRHS, 2); 2927 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2928 if (flg) { 2929 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2930 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2931 2932 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2933 2934 if (mumps->petsc_size > 1) { 2935 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2936 Btseq = b->A; 2937 } else { 2938 Btseq = Bt; 2939 } 2940 2941 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2942 mumps->id.nrhs = nrhs; 2943 mumps->id.lrhs = M; 2944 mumps->id.rhs = NULL; 2945 2946 if (!mumps->myid) { 2947 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2948 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2949 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2950 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2951 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2952 } else { 2953 mumps->id.irhs_ptr = NULL; 2954 mumps->id.irhs_sparse = NULL; 2955 mumps->id.nz_rhs = 0; 2956 mumps->id.rhs_sparse = NULL; 2957 } 2958 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2959 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2960 2961 /* solve phase */ 2962 mumps->id.job = JOB_SOLVE; 2963 PetscMUMPS_c(mumps); 2964 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2965 2966 if (!mumps->myid) { 2967 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 2968 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2969 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2970 } 2971 PetscFunctionReturn(PETSC_SUCCESS); 2972 } 2973 2974 /*@ 2975 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 2976 2977 Logically Collective 2978 2979 Input Parameter: 2980 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2981 2982 Output Parameter: 2983 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 2984 2985 Level: beginner 2986 2987 References: 2988 . * - MUMPS Users' Guide 2989 2990 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 2991 @*/ 2992 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2993 { 2994 PetscFunctionBegin; 2995 PetscValidType(F, 1); 2996 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2997 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 2998 PetscFunctionReturn(PETSC_SUCCESS); 2999 } 3000 3001 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3002 { 3003 Mat spRHS; 3004 3005 PetscFunctionBegin; 3006 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3007 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3008 PetscCall(MatDestroy(&spRHS)); 3009 PetscFunctionReturn(PETSC_SUCCESS); 3010 } 3011 3012 /*@ 3013 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 3014 3015 Logically Collective 3016 3017 Input Parameter: 3018 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3019 3020 Output Parameter: 3021 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3022 3023 Level: beginner 3024 3025 References: 3026 . * - MUMPS Users' Guide 3027 3028 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3029 @*/ 3030 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3031 { 3032 PetscBool flg; 3033 3034 PetscFunctionBegin; 3035 PetscValidType(F, 1); 3036 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3037 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3038 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3039 3040 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3041 PetscFunctionReturn(PETSC_SUCCESS); 3042 } 3043 3044 /*@ 3045 MatMumpsGetInfo - Get MUMPS parameter INFO() 3046 3047 Logically Collective 3048 3049 Input Parameters: 3050 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3051 - icntl - index of MUMPS parameter array INFO() 3052 3053 Output Parameter: 3054 . ival - value of MUMPS INFO(icntl) 3055 3056 Level: beginner 3057 3058 References: 3059 . * - MUMPS Users' Guide 3060 3061 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3062 @*/ 3063 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3064 { 3065 PetscFunctionBegin; 3066 PetscValidType(F, 1); 3067 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3068 PetscAssertPointer(ival, 3); 3069 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3070 PetscFunctionReturn(PETSC_SUCCESS); 3071 } 3072 3073 /*@ 3074 MatMumpsGetInfog - Get MUMPS parameter INFOG() 3075 3076 Logically Collective 3077 3078 Input Parameters: 3079 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3080 - icntl - index of MUMPS parameter array INFOG() 3081 3082 Output Parameter: 3083 . ival - value of MUMPS INFOG(icntl) 3084 3085 Level: beginner 3086 3087 References: 3088 . * - MUMPS Users' Guide 3089 3090 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3091 @*/ 3092 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3093 { 3094 PetscFunctionBegin; 3095 PetscValidType(F, 1); 3096 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3097 PetscAssertPointer(ival, 3); 3098 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3099 PetscFunctionReturn(PETSC_SUCCESS); 3100 } 3101 3102 /*@ 3103 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3104 3105 Logically Collective 3106 3107 Input Parameters: 3108 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3109 - icntl - index of MUMPS parameter array RINFO() 3110 3111 Output Parameter: 3112 . val - value of MUMPS RINFO(icntl) 3113 3114 Level: beginner 3115 3116 References: 3117 . * - MUMPS Users' Guide 3118 3119 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3120 @*/ 3121 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3122 { 3123 PetscFunctionBegin; 3124 PetscValidType(F, 1); 3125 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3126 PetscAssertPointer(val, 3); 3127 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3128 PetscFunctionReturn(PETSC_SUCCESS); 3129 } 3130 3131 /*@ 3132 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3133 3134 Logically Collective 3135 3136 Input Parameters: 3137 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3138 - icntl - index of MUMPS parameter array RINFOG() 3139 3140 Output Parameter: 3141 . val - value of MUMPS RINFOG(icntl) 3142 3143 Level: beginner 3144 3145 References: 3146 . * - MUMPS Users' Guide 3147 3148 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3149 @*/ 3150 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3151 { 3152 PetscFunctionBegin; 3153 PetscValidType(F, 1); 3154 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3155 PetscAssertPointer(val, 3); 3156 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3157 PetscFunctionReturn(PETSC_SUCCESS); 3158 } 3159 3160 /*@ 3161 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 3162 3163 Logically Collective 3164 3165 Input Parameter: 3166 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3167 3168 Output Parameters: 3169 + size - local size of the array. The size of the array is non-zero only on the host. 3170 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3171 for freeing this array. 3172 3173 Level: beginner 3174 3175 References: 3176 . * - MUMPS Users' Guide 3177 3178 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3179 @*/ 3180 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3181 { 3182 PetscFunctionBegin; 3183 PetscValidType(F, 1); 3184 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3185 PetscAssertPointer(size, 2); 3186 PetscAssertPointer(array, 3); 3187 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3188 PetscFunctionReturn(PETSC_SUCCESS); 3189 } 3190 3191 /*MC 3192 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3193 distributed and sequential matrices via the external package MUMPS. 3194 3195 Works with `MATAIJ` and `MATSBAIJ` matrices 3196 3197 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3198 3199 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3200 See details below. 3201 3202 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3203 3204 Options Database Keys: 3205 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3206 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3207 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3208 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3209 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3210 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3211 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3212 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3213 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3214 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3215 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3216 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3217 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3218 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3219 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3220 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3221 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3222 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3223 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3224 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3225 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3226 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 3227 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3228 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3229 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3230 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3231 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3232 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3233 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3234 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3235 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3236 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3237 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3238 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3239 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3240 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3241 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3242 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3243 3244 Level: beginner 3245 3246 Notes: 3247 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 3248 error if the matrix is Hermitian. 3249 3250 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3251 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3252 3253 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3254 the failure with 3255 .vb 3256 KSPGetPC(ksp,&pc); 3257 PCFactorGetMatrix(pc,&mat); 3258 MatMumpsGetInfo(mat,....); 3259 MatMumpsGetInfog(mat,....); etc. 3260 .ve 3261 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3262 3263 MUMPS provides 64-bit integer support in two build modes: 3264 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3265 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3266 3267 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3268 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3269 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3270 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3271 3272 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3273 3274 Two modes to run MUMPS/PETSc with OpenMP 3275 .vb 3276 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3277 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3278 .ve 3279 3280 .vb 3281 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3282 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3283 .ve 3284 3285 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3286 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3287 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3288 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3289 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3290 3291 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3292 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3293 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3294 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3295 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3296 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3297 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3298 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3299 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3300 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3301 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3302 examine the mapping result. 3303 3304 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3305 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3306 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3307 3308 References: 3309 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3310 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3311 3312 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3313 M*/ 3314 3315 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3316 { 3317 PetscFunctionBegin; 3318 *type = MATSOLVERMUMPS; 3319 PetscFunctionReturn(PETSC_SUCCESS); 3320 } 3321 3322 /* MatGetFactor for Seq and MPI AIJ matrices */ 3323 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3324 { 3325 Mat B; 3326 Mat_MUMPS *mumps; 3327 PetscBool isSeqAIJ; 3328 PetscMPIInt size; 3329 3330 PetscFunctionBegin; 3331 #if defined(PETSC_USE_COMPLEX) 3332 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3333 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3334 *F = NULL; 3335 PetscFunctionReturn(PETSC_SUCCESS); 3336 } 3337 #endif 3338 /* Create the factorization matrix */ 3339 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3340 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3341 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3342 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3343 PetscCall(MatSetUp(B)); 3344 3345 PetscCall(PetscNew(&mumps)); 3346 3347 B->ops->view = MatView_MUMPS; 3348 B->ops->getinfo = MatGetInfo_MUMPS; 3349 3350 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3351 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3352 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3353 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3354 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3355 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3356 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3357 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3358 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3359 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3360 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3361 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3362 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3363 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3364 3365 if (ftype == MAT_FACTOR_LU) { 3366 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3367 B->factortype = MAT_FACTOR_LU; 3368 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3369 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3370 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3371 mumps->sym = 0; 3372 } else { 3373 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3374 B->factortype = MAT_FACTOR_CHOLESKY; 3375 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3376 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3377 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3378 #if defined(PETSC_USE_COMPLEX) 3379 mumps->sym = 2; 3380 #else 3381 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3382 else mumps->sym = 2; 3383 #endif 3384 } 3385 3386 /* set solvertype */ 3387 PetscCall(PetscFree(B->solvertype)); 3388 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3389 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3390 if (size == 1) { 3391 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3392 B->canuseordering = PETSC_TRUE; 3393 } 3394 B->ops->destroy = MatDestroy_MUMPS; 3395 B->data = (void *)mumps; 3396 3397 *F = B; 3398 mumps->id.job = JOB_NULL; 3399 mumps->ICNTL_pre = NULL; 3400 mumps->CNTL_pre = NULL; 3401 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3402 PetscFunctionReturn(PETSC_SUCCESS); 3403 } 3404 3405 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3406 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3407 { 3408 Mat B; 3409 Mat_MUMPS *mumps; 3410 PetscBool isSeqSBAIJ; 3411 PetscMPIInt size; 3412 3413 PetscFunctionBegin; 3414 #if defined(PETSC_USE_COMPLEX) 3415 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3416 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3417 *F = NULL; 3418 PetscFunctionReturn(PETSC_SUCCESS); 3419 } 3420 #endif 3421 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3422 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3423 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3424 PetscCall(MatSetUp(B)); 3425 3426 PetscCall(PetscNew(&mumps)); 3427 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3428 if (isSeqSBAIJ) { 3429 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3430 } else { 3431 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3432 } 3433 3434 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3435 B->ops->view = MatView_MUMPS; 3436 B->ops->getinfo = MatGetInfo_MUMPS; 3437 3438 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3439 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3440 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3441 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3442 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3443 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3444 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3445 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3446 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3447 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3448 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3449 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3450 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3451 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3452 3453 B->factortype = MAT_FACTOR_CHOLESKY; 3454 #if defined(PETSC_USE_COMPLEX) 3455 mumps->sym = 2; 3456 #else 3457 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3458 else mumps->sym = 2; 3459 #endif 3460 3461 /* set solvertype */ 3462 PetscCall(PetscFree(B->solvertype)); 3463 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3464 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3465 if (size == 1) { 3466 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3467 B->canuseordering = PETSC_TRUE; 3468 } 3469 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3470 B->ops->destroy = MatDestroy_MUMPS; 3471 B->data = (void *)mumps; 3472 3473 *F = B; 3474 mumps->id.job = JOB_NULL; 3475 mumps->ICNTL_pre = NULL; 3476 mumps->CNTL_pre = NULL; 3477 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3478 PetscFunctionReturn(PETSC_SUCCESS); 3479 } 3480 3481 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3482 { 3483 Mat B; 3484 Mat_MUMPS *mumps; 3485 PetscBool isSeqBAIJ; 3486 PetscMPIInt size; 3487 3488 PetscFunctionBegin; 3489 /* Create the factorization matrix */ 3490 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3491 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3492 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3493 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3494 PetscCall(MatSetUp(B)); 3495 3496 PetscCall(PetscNew(&mumps)); 3497 if (ftype == MAT_FACTOR_LU) { 3498 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3499 B->factortype = MAT_FACTOR_LU; 3500 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3501 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3502 mumps->sym = 0; 3503 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3504 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3505 3506 B->ops->view = MatView_MUMPS; 3507 B->ops->getinfo = MatGetInfo_MUMPS; 3508 3509 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3510 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3511 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3512 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3513 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3514 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3515 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3516 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3517 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3518 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3519 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3520 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3521 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3522 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3523 3524 /* set solvertype */ 3525 PetscCall(PetscFree(B->solvertype)); 3526 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3527 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3528 if (size == 1) { 3529 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3530 B->canuseordering = PETSC_TRUE; 3531 } 3532 B->ops->destroy = MatDestroy_MUMPS; 3533 B->data = (void *)mumps; 3534 3535 *F = B; 3536 mumps->id.job = JOB_NULL; 3537 mumps->ICNTL_pre = NULL; 3538 mumps->CNTL_pre = NULL; 3539 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3540 PetscFunctionReturn(PETSC_SUCCESS); 3541 } 3542 3543 /* MatGetFactor for Seq and MPI SELL matrices */ 3544 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3545 { 3546 Mat B; 3547 Mat_MUMPS *mumps; 3548 PetscBool isSeqSELL; 3549 PetscMPIInt size; 3550 3551 PetscFunctionBegin; 3552 /* Create the factorization matrix */ 3553 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3554 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3555 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3556 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3557 PetscCall(MatSetUp(B)); 3558 3559 PetscCall(PetscNew(&mumps)); 3560 3561 B->ops->view = MatView_MUMPS; 3562 B->ops->getinfo = MatGetInfo_MUMPS; 3563 3564 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3565 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3566 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3567 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3568 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3569 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3570 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3571 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3572 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3573 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3574 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3575 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3576 3577 if (ftype == MAT_FACTOR_LU) { 3578 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3579 B->factortype = MAT_FACTOR_LU; 3580 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3581 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3582 mumps->sym = 0; 3583 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3584 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3585 3586 /* set solvertype */ 3587 PetscCall(PetscFree(B->solvertype)); 3588 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3589 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3590 if (size == 1) { 3591 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3592 B->canuseordering = PETSC_TRUE; 3593 } 3594 B->ops->destroy = MatDestroy_MUMPS; 3595 B->data = (void *)mumps; 3596 3597 *F = B; 3598 mumps->id.job = JOB_NULL; 3599 mumps->ICNTL_pre = NULL; 3600 mumps->CNTL_pre = NULL; 3601 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3602 PetscFunctionReturn(PETSC_SUCCESS); 3603 } 3604 3605 /* MatGetFactor for MATNEST matrices */ 3606 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3607 { 3608 Mat B, **mats; 3609 Mat_MUMPS *mumps; 3610 PetscInt nr, nc; 3611 PetscMPIInt size; 3612 PetscBool flg = PETSC_TRUE; 3613 3614 PetscFunctionBegin; 3615 #if defined(PETSC_USE_COMPLEX) 3616 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3617 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3618 *F = NULL; 3619 PetscFunctionReturn(PETSC_SUCCESS); 3620 } 3621 #endif 3622 3623 /* Return if some condition is not satisfied */ 3624 *F = NULL; 3625 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3626 if (ftype == MAT_FACTOR_CHOLESKY) { 3627 IS *rows, *cols; 3628 PetscInt *m, *M; 3629 3630 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3631 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3632 PetscCall(MatNestGetISs(A, rows, cols)); 3633 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3634 if (!flg) { 3635 PetscCall(PetscFree2(rows, cols)); 3636 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3637 PetscFunctionReturn(PETSC_SUCCESS); 3638 } 3639 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3640 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3641 for (PetscInt r = 0; flg && r < nr; r++) 3642 for (PetscInt k = r + 1; flg && k < nr; k++) 3643 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3644 PetscCall(PetscFree2(m, M)); 3645 PetscCall(PetscFree2(rows, cols)); 3646 if (!flg) { 3647 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3648 PetscFunctionReturn(PETSC_SUCCESS); 3649 } 3650 } 3651 3652 for (PetscInt r = 0; r < nr; r++) { 3653 for (PetscInt c = 0; c < nc; c++) { 3654 Mat sub = mats[r][c]; 3655 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans; 3656 3657 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3658 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3659 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3660 else { 3661 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3662 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3663 } 3664 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3665 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3666 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3667 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3668 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3669 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3670 if (ftype == MAT_FACTOR_CHOLESKY) { 3671 if (r == c && !isSeqAIJ && !isMPIAIJ && !isSeqSBAIJ && !isMPISBAIJ) { 3672 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3673 flg = PETSC_FALSE; 3674 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 3675 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3676 flg = PETSC_FALSE; 3677 } 3678 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 3679 PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3680 flg = PETSC_FALSE; 3681 } 3682 } 3683 } 3684 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3685 3686 /* Create the factorization matrix */ 3687 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3688 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3689 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3690 PetscCall(MatSetUp(B)); 3691 3692 PetscCall(PetscNew(&mumps)); 3693 3694 B->ops->view = MatView_MUMPS; 3695 B->ops->getinfo = MatGetInfo_MUMPS; 3696 3697 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3698 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3699 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3700 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3701 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3702 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3703 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3704 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3705 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3706 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3707 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3708 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3709 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3710 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3711 3712 if (ftype == MAT_FACTOR_LU) { 3713 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3714 B->factortype = MAT_FACTOR_LU; 3715 mumps->sym = 0; 3716 } else { 3717 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3718 B->factortype = MAT_FACTOR_CHOLESKY; 3719 #if defined(PETSC_USE_COMPLEX) 3720 mumps->sym = 2; 3721 #else 3722 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3723 else mumps->sym = 2; 3724 #endif 3725 } 3726 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3727 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3728 3729 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3730 if (size == 1) { 3731 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3732 B->canuseordering = PETSC_TRUE; 3733 } 3734 3735 /* set solvertype */ 3736 PetscCall(PetscFree(B->solvertype)); 3737 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3738 B->ops->destroy = MatDestroy_MUMPS; 3739 B->data = (void *)mumps; 3740 3741 *F = B; 3742 mumps->id.job = JOB_NULL; 3743 mumps->ICNTL_pre = NULL; 3744 mumps->CNTL_pre = NULL; 3745 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3746 PetscFunctionReturn(PETSC_SUCCESS); 3747 } 3748 3749 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3750 { 3751 PetscFunctionBegin; 3752 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3753 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3754 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3755 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3756 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3757 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3758 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3759 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3760 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3761 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3762 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3763 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3764 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3765 PetscFunctionReturn(PETSC_SUCCESS); 3766 } 3767