1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #if defined(PETSC_USE_REAL_SINGLE) 15 #include <cmumps_c.h> 16 #else 17 #include <zmumps_c.h> 18 #endif 19 #else 20 #if defined(PETSC_USE_REAL_SINGLE) 21 #include <smumps_c.h> 22 #else 23 #include <dmumps_c.h> 24 #endif 25 #endif 26 EXTERN_C_END 27 #define JOB_INIT -1 28 #define JOB_NULL 0 29 #define JOB_FACTSYMBOLIC 1 30 #define JOB_FACTNUMERIC 2 31 #define JOB_SOLVE 3 32 #define JOB_END -2 33 34 /* calls to MUMPS */ 35 #if defined(PETSC_USE_COMPLEX) 36 #if defined(PETSC_USE_REAL_SINGLE) 37 #define MUMPS_c cmumps_c 38 #else 39 #define MUMPS_c zmumps_c 40 #endif 41 #else 42 #if defined(PETSC_USE_REAL_SINGLE) 43 #define MUMPS_c smumps_c 44 #else 45 #define MUMPS_c dmumps_c 46 #endif 47 #endif 48 49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 50 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 51 naming convention in PetscMPIInt, PetscBLASInt etc. 52 */ 53 typedef MUMPS_INT PetscMUMPSInt; 54 55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 56 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 57 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 58 #endif 59 #else 60 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 61 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 62 #endif 63 #endif 64 65 #define MPIU_MUMPSINT MPI_INT 66 #define PETSC_MUMPS_INT_MAX 2147483647 67 #define PETSC_MUMPS_INT_MIN -2147483648 68 69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 70 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 71 { 72 PetscFunctionBegin; 73 #if PetscDefined(USE_64BIT_INDICES) 74 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 75 #endif 76 *b = (PetscMUMPSInt)(a); 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 /* Put these utility routines here since they are only used in this file */ 81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 82 { 83 PetscInt myval; 84 PetscBool myset; 85 PetscFunctionBegin; 86 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 87 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 88 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 89 if (set) *set = myset; 90 PetscFunctionReturn(PETSC_SUCCESS); 91 } 92 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 93 94 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 95 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 96 #define PetscMUMPS_c(mumps) \ 97 do { \ 98 if (mumps->use_petsc_omp_support) { \ 99 if (mumps->is_omp_master) { \ 100 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 101 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 102 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 103 PetscCall(PetscFPTrapPop()); \ 104 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 105 } \ 106 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 107 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 108 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 109 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 110 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 111 */ \ 112 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 113 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 115 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 116 } else { \ 117 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 118 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 119 PetscCall(PetscFPTrapPop()); \ 120 } \ 121 } while (0) 122 #else 123 #define PetscMUMPS_c(mumps) \ 124 do { \ 125 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 126 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 127 PetscCall(PetscFPTrapPop()); \ 128 } while (0) 129 #endif 130 131 /* declare MumpsScalar */ 132 #if defined(PETSC_USE_COMPLEX) 133 #if defined(PETSC_USE_REAL_SINGLE) 134 #define MumpsScalar mumps_complex 135 #else 136 #define MumpsScalar mumps_double_complex 137 #endif 138 #else 139 #define MumpsScalar PetscScalar 140 #endif 141 142 /* macros s.t. indices match MUMPS documentation */ 143 #define ICNTL(I) icntl[(I)-1] 144 #define CNTL(I) cntl[(I)-1] 145 #define INFOG(I) infog[(I)-1] 146 #define INFO(I) info[(I)-1] 147 #define RINFOG(I) rinfog[(I)-1] 148 #define RINFO(I) rinfo[(I)-1] 149 150 typedef struct Mat_MUMPS Mat_MUMPS; 151 struct Mat_MUMPS { 152 #if defined(PETSC_USE_COMPLEX) 153 #if defined(PETSC_USE_REAL_SINGLE) 154 CMUMPS_STRUC_C id; 155 #else 156 ZMUMPS_STRUC_C id; 157 #endif 158 #else 159 #if defined(PETSC_USE_REAL_SINGLE) 160 SMUMPS_STRUC_C id; 161 #else 162 DMUMPS_STRUC_C id; 163 #endif 164 #endif 165 166 MatStructure matstruc; 167 PetscMPIInt myid, petsc_size; 168 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 169 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 170 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 171 PetscMUMPSInt sym; 172 MPI_Comm mumps_comm; 173 PetscMUMPSInt *ICNTL_pre; 174 PetscReal *CNTL_pre; 175 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 176 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 177 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 178 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 179 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 180 PetscInt *rhs_nrow, max_nrhs; 181 PetscMPIInt *rhs_recvcounts, *rhs_disps; 182 PetscScalar *rhs_loc, *rhs_recvbuf; 183 #endif 184 Vec b_seq, x_seq; 185 PetscInt ninfo, *info; /* which INFO to display */ 186 PetscInt sizeredrhs; 187 PetscScalar *schur_sol; 188 PetscInt schur_sizesol; 189 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 190 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 191 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 192 193 /* Support for MATNEST */ 194 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 195 PetscInt64 *nest_vals_start; 196 PetscScalar *nest_vals; 197 198 /* stuff used by petsc/mumps OpenMP support*/ 199 PetscBool use_petsc_omp_support; 200 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 201 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 202 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 203 PetscMPIInt tag, omp_comm_size; 204 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 205 MPI_Request *reqs; 206 }; 207 208 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 209 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 210 */ 211 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 212 { 213 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 214 215 PetscFunctionBegin; 216 #if defined(PETSC_USE_64BIT_INDICES) 217 { 218 PetscInt i; 219 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 220 PetscCall(PetscFree(mumps->ia_alloc)); 221 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 222 mumps->cur_ilen = nrow + 1; 223 } 224 if (nnz > mumps->cur_jlen) { 225 PetscCall(PetscFree(mumps->ja_alloc)); 226 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 227 mumps->cur_jlen = nnz; 228 } 229 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 230 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 231 *ia_mumps = mumps->ia_alloc; 232 *ja_mumps = mumps->ja_alloc; 233 } 234 #else 235 *ia_mumps = ia; 236 *ja_mumps = ja; 237 #endif 238 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 243 { 244 PetscFunctionBegin; 245 PetscCall(PetscFree(mumps->id.listvar_schur)); 246 PetscCall(PetscFree(mumps->id.redrhs)); 247 PetscCall(PetscFree(mumps->schur_sol)); 248 mumps->id.size_schur = 0; 249 mumps->id.schur_lld = 0; 250 mumps->id.ICNTL(19) = 0; 251 PetscFunctionReturn(PETSC_SUCCESS); 252 } 253 254 /* solve with rhs in mumps->id.redrhs and return in the same location */ 255 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 256 { 257 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 258 Mat S, B, X; 259 MatFactorSchurStatus schurstatus; 260 PetscInt sizesol; 261 262 PetscFunctionBegin; 263 PetscCall(MatFactorFactorizeSchurComplement(F)); 264 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 265 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 266 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 267 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 268 PetscCall(MatBindToCPU(B, S->boundtocpu)); 269 #endif 270 switch (schurstatus) { 271 case MAT_FACTOR_SCHUR_FACTORED: 272 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 273 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 274 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 275 PetscCall(MatBindToCPU(X, S->boundtocpu)); 276 #endif 277 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 278 PetscCall(MatMatSolveTranspose(S, B, X)); 279 } else { 280 PetscCall(MatMatSolve(S, B, X)); 281 } 282 break; 283 case MAT_FACTOR_SCHUR_INVERTED: 284 sizesol = mumps->id.nrhs * mumps->id.size_schur; 285 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 286 PetscCall(PetscFree(mumps->schur_sol)); 287 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 288 mumps->schur_sizesol = sizesol; 289 } 290 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 291 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 292 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 293 PetscCall(MatBindToCPU(X, S->boundtocpu)); 294 #endif 295 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 296 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 297 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 298 } else { 299 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 300 } 301 PetscCall(MatProductSetFromOptions(X)); 302 PetscCall(MatProductSymbolic(X)); 303 PetscCall(MatProductNumeric(X)); 304 305 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 306 break; 307 default: 308 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 309 } 310 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 311 PetscCall(MatDestroy(&B)); 312 PetscCall(MatDestroy(&X)); 313 PetscFunctionReturn(PETSC_SUCCESS); 314 } 315 316 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 317 { 318 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 319 320 PetscFunctionBegin; 321 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 if (!expansion) { /* prepare for the condensation step */ 325 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 326 /* allocate MUMPS internal array to store reduced right-hand sides */ 327 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 328 PetscCall(PetscFree(mumps->id.redrhs)); 329 mumps->id.lredrhs = mumps->id.size_schur; 330 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 331 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 332 } 333 } else { /* prepare for the expansion step */ 334 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 335 PetscCall(MatMumpsSolveSchur_Private(F)); 336 mumps->id.ICNTL(26) = 2; /* expansion phase */ 337 PetscMUMPS_c(mumps); 338 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 339 /* restore defaults */ 340 mumps->id.ICNTL(26) = -1; 341 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 342 if (mumps->id.nrhs > 1) { 343 PetscCall(PetscFree(mumps->id.redrhs)); 344 mumps->id.lredrhs = 0; 345 mumps->sizeredrhs = 0; 346 } 347 } 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 /* 352 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 353 354 input: 355 A - matrix in aij,baij or sbaij format 356 shift - 0: C style output triple; 1: Fortran style output triple. 357 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 358 MAT_REUSE_MATRIX: only the values in v array are updated 359 output: 360 nnz - dim of r, c, and v (number of local nonzero entries of A) 361 r, c, v - row and col index, matrix values (matrix triples) 362 363 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 364 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 365 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 366 367 */ 368 369 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 370 { 371 const PetscScalar *av; 372 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 373 PetscInt64 nz, rnz, i, j, k; 374 PetscMUMPSInt *row, *col; 375 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 376 377 PetscFunctionBegin; 378 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 379 if (reuse == MAT_INITIAL_MATRIX) { 380 nz = aa->nz; 381 ai = aa->i; 382 aj = aa->j; 383 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 384 for (i = k = 0; i < M; i++) { 385 rnz = ai[i + 1] - ai[i]; 386 ajj = aj + ai[i]; 387 for (j = 0; j < rnz; j++) { 388 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 389 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 390 k++; 391 } 392 } 393 mumps->val = (PetscScalar *)av; 394 mumps->irn = row; 395 mumps->jcn = col; 396 mumps->nnz = nz; 397 } else PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); 398 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 403 { 404 PetscInt64 nz, i, j, k, r; 405 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 406 PetscMUMPSInt *row, *col; 407 408 PetscFunctionBegin; 409 nz = a->sliidx[a->totalslices]; 410 if (reuse == MAT_INITIAL_MATRIX) { 411 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 412 for (i = k = 0; i < a->totalslices; i++) { 413 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 414 } 415 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 416 mumps->irn = row; 417 mumps->jcn = col; 418 mumps->nnz = nz; 419 mumps->val = a->val; 420 } else PetscCall(PetscArraycpy(mumps->val, a->val, nz)); 421 PetscFunctionReturn(PETSC_SUCCESS); 422 } 423 424 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 425 { 426 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 427 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 428 PetscInt64 M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 429 PetscInt bs; 430 PetscMUMPSInt *row, *col; 431 432 PetscFunctionBegin; 433 if (reuse == MAT_INITIAL_MATRIX) { 434 PetscCall(MatGetBlockSize(A, &bs)); 435 M = A->rmap->N / bs; 436 ai = aa->i; 437 aj = aa->j; 438 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 439 for (i = 0; i < M; i++) { 440 ajj = aj + ai[i]; 441 rnz = ai[i + 1] - ai[i]; 442 for (k = 0; k < rnz; k++) { 443 for (j = 0; j < bs; j++) { 444 for (m = 0; m < bs; m++) { 445 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 446 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 447 idx++; 448 } 449 } 450 } 451 } 452 mumps->irn = row; 453 mumps->jcn = col; 454 mumps->nnz = nz; 455 mumps->val = aa->a; 456 } else PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); 457 PetscFunctionReturn(PETSC_SUCCESS); 458 } 459 460 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 461 { 462 const PetscInt *ai, *aj, *ajj; 463 PetscInt bs; 464 PetscInt64 nz, rnz, i, j, k, m; 465 PetscMUMPSInt *row, *col; 466 PetscScalar *val; 467 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 468 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 469 #if defined(PETSC_USE_COMPLEX) 470 PetscBool isset, hermitian; 471 #endif 472 473 PetscFunctionBegin; 474 #if defined(PETSC_USE_COMPLEX) 475 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 476 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 477 #endif 478 ai = aa->i; 479 aj = aa->j; 480 PetscCall(MatGetBlockSize(A, &bs)); 481 if (reuse == MAT_INITIAL_MATRIX) { 482 const PetscInt64 alloc_size = aa->nz * bs2; 483 484 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 485 if (bs > 1) { 486 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 487 mumps->val = mumps->val_alloc; 488 } else { 489 mumps->val = aa->a; 490 } 491 mumps->irn = row; 492 mumps->jcn = col; 493 } else { 494 row = mumps->irn; 495 col = mumps->jcn; 496 } 497 val = mumps->val; 498 499 nz = 0; 500 if (bs > 1) { 501 for (i = 0; i < mbs; i++) { 502 rnz = ai[i + 1] - ai[i]; 503 ajj = aj + ai[i]; 504 for (j = 0; j < rnz; j++) { 505 for (k = 0; k < bs; k++) { 506 for (m = 0; m < bs; m++) { 507 if (ajj[j] > i || k >= m) { 508 if (reuse == MAT_INITIAL_MATRIX) { 509 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 510 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 511 } 512 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 513 } 514 } 515 } 516 } 517 } 518 } else if (reuse == MAT_INITIAL_MATRIX) { 519 for (i = 0; i < mbs; i++) { 520 rnz = ai[i + 1] - ai[i]; 521 ajj = aj + ai[i]; 522 for (j = 0; j < rnz; j++) { 523 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 524 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 525 nz++; 526 } 527 } 528 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 529 } else PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX */ 530 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 531 PetscFunctionReturn(PETSC_SUCCESS); 532 } 533 534 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 535 { 536 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 537 PetscInt64 nz, rnz, i, j; 538 const PetscScalar *av, *v1; 539 PetscScalar *val; 540 PetscMUMPSInt *row, *col; 541 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 542 PetscBool missing; 543 #if defined(PETSC_USE_COMPLEX) 544 PetscBool hermitian, isset; 545 #endif 546 547 PetscFunctionBegin; 548 #if defined(PETSC_USE_COMPLEX) 549 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 550 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 551 #endif 552 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 553 ai = aa->i; 554 aj = aa->j; 555 adiag = aa->diag; 556 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 557 if (reuse == MAT_INITIAL_MATRIX) { 558 /* count nz in the upper triangular part of A */ 559 nz = 0; 560 if (missing) { 561 for (i = 0; i < M; i++) { 562 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 563 for (j = ai[i]; j < ai[i + 1]; j++) { 564 if (aj[j] < i) continue; 565 nz++; 566 } 567 } else { 568 nz += ai[i + 1] - adiag[i]; 569 } 570 } 571 } else { 572 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 573 } 574 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 575 PetscCall(PetscMalloc1(nz, &val)); 576 mumps->nnz = nz; 577 mumps->irn = row; 578 mumps->jcn = col; 579 mumps->val = mumps->val_alloc = val; 580 581 nz = 0; 582 if (missing) { 583 for (i = 0; i < M; i++) { 584 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 585 for (j = ai[i]; j < ai[i + 1]; j++) { 586 if (aj[j] < i) continue; 587 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 588 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 589 val[nz] = av[j]; 590 nz++; 591 } 592 } else { 593 rnz = ai[i + 1] - adiag[i]; 594 ajj = aj + adiag[i]; 595 v1 = av + adiag[i]; 596 for (j = 0; j < rnz; j++) { 597 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 598 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 599 val[nz++] = v1[j]; 600 } 601 } 602 } 603 } else { 604 for (i = 0; i < M; i++) { 605 rnz = ai[i + 1] - adiag[i]; 606 ajj = aj + adiag[i]; 607 v1 = av + adiag[i]; 608 for (j = 0; j < rnz; j++) { 609 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 610 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 611 val[nz++] = v1[j]; 612 } 613 } 614 } 615 } else { 616 nz = 0; 617 val = mumps->val; 618 if (missing) { 619 for (i = 0; i < M; i++) { 620 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 621 for (j = ai[i]; j < ai[i + 1]; j++) { 622 if (aj[j] < i) continue; 623 val[nz++] = av[j]; 624 } 625 } else { 626 rnz = ai[i + 1] - adiag[i]; 627 v1 = av + adiag[i]; 628 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 629 } 630 } 631 } else { 632 for (i = 0; i < M; i++) { 633 rnz = ai[i + 1] - adiag[i]; 634 v1 = av + adiag[i]; 635 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 636 } 637 } 638 } 639 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 640 PetscFunctionReturn(PETSC_SUCCESS); 641 } 642 643 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 644 { 645 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 646 PetscInt bs; 647 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 648 PetscMUMPSInt *row, *col; 649 const PetscScalar *av, *bv, *v1, *v2; 650 PetscScalar *val; 651 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 652 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 653 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 654 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 655 #if defined(PETSC_USE_COMPLEX) 656 PetscBool hermitian, isset; 657 #endif 658 659 PetscFunctionBegin; 660 #if defined(PETSC_USE_COMPLEX) 661 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 662 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 663 #endif 664 PetscCall(MatGetBlockSize(A, &bs)); 665 rstart = A->rmap->rstart; 666 ai = aa->i; 667 aj = aa->j; 668 bi = bb->i; 669 bj = bb->j; 670 av = aa->a; 671 bv = bb->a; 672 673 garray = mat->garray; 674 675 if (reuse == MAT_INITIAL_MATRIX) { 676 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 677 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 678 PetscCall(PetscMalloc1(nz, &val)); 679 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 680 mumps->irn = row; 681 mumps->jcn = col; 682 mumps->val = mumps->val_alloc = val; 683 } else { 684 val = mumps->val; 685 } 686 687 jj = 0; 688 irow = rstart; 689 for (i = 0; i < mbs; i++) { 690 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 691 countA = ai[i + 1] - ai[i]; 692 countB = bi[i + 1] - bi[i]; 693 bjj = bj + bi[i]; 694 v1 = av + ai[i] * bs2; 695 v2 = bv + bi[i] * bs2; 696 697 if (bs > 1) { 698 /* A-part */ 699 for (j = 0; j < countA; j++) { 700 for (k = 0; k < bs; k++) { 701 for (m = 0; m < bs; m++) { 702 if (rstart + ajj[j] * bs > irow || k >= m) { 703 if (reuse == MAT_INITIAL_MATRIX) { 704 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 705 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 706 } 707 val[jj++] = v1[j * bs2 + m + k * bs]; 708 } 709 } 710 } 711 } 712 713 /* B-part */ 714 for (j = 0; j < countB; j++) { 715 for (k = 0; k < bs; k++) { 716 for (m = 0; m < bs; m++) { 717 if (reuse == MAT_INITIAL_MATRIX) { 718 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 719 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 720 } 721 val[jj++] = v2[j * bs2 + m + k * bs]; 722 } 723 } 724 } 725 } else { 726 /* A-part */ 727 for (j = 0; j < countA; j++) { 728 if (reuse == MAT_INITIAL_MATRIX) { 729 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 730 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 731 } 732 val[jj++] = v1[j]; 733 } 734 735 /* B-part */ 736 for (j = 0; j < countB; j++) { 737 if (reuse == MAT_INITIAL_MATRIX) { 738 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 739 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 740 } 741 val[jj++] = v2[j]; 742 } 743 } 744 irow += bs; 745 } 746 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 747 PetscFunctionReturn(PETSC_SUCCESS); 748 } 749 750 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 751 { 752 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 753 PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 754 PetscMUMPSInt *row, *col; 755 const PetscScalar *av, *bv, *v1, *v2; 756 PetscScalar *val; 757 Mat Ad, Ao; 758 Mat_SeqAIJ *aa; 759 Mat_SeqAIJ *bb; 760 761 PetscFunctionBegin; 762 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 763 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 764 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 765 766 aa = (Mat_SeqAIJ *)(Ad)->data; 767 bb = (Mat_SeqAIJ *)(Ao)->data; 768 ai = aa->i; 769 aj = aa->j; 770 bi = bb->i; 771 bj = bb->j; 772 773 rstart = A->rmap->rstart; 774 cstart = A->cmap->rstart; 775 776 if (reuse == MAT_INITIAL_MATRIX) { 777 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 778 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 779 PetscCall(PetscMalloc1(nz, &val)); 780 mumps->nnz = nz; 781 mumps->irn = row; 782 mumps->jcn = col; 783 mumps->val = mumps->val_alloc = val; 784 } else { 785 val = mumps->val; 786 } 787 788 jj = 0; 789 irow = rstart; 790 for (i = 0; i < m; i++) { 791 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 792 countA = ai[i + 1] - ai[i]; 793 countB = bi[i + 1] - bi[i]; 794 bjj = bj + bi[i]; 795 v1 = av + ai[i]; 796 v2 = bv + bi[i]; 797 798 /* A-part */ 799 for (j = 0; j < countA; j++) { 800 if (reuse == MAT_INITIAL_MATRIX) { 801 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 802 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 803 } 804 val[jj++] = v1[j]; 805 } 806 807 /* B-part */ 808 for (j = 0; j < countB; j++) { 809 if (reuse == MAT_INITIAL_MATRIX) { 810 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 811 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 812 } 813 val[jj++] = v2[j]; 814 } 815 irow++; 816 } 817 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 818 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 819 PetscFunctionReturn(PETSC_SUCCESS); 820 } 821 822 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 823 { 824 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 825 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 826 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 827 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 828 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 829 const PetscInt bs2 = mat->bs2; 830 PetscInt bs; 831 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 832 PetscMUMPSInt *row, *col; 833 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 834 PetscScalar *val; 835 836 PetscFunctionBegin; 837 PetscCall(MatGetBlockSize(A, &bs)); 838 if (reuse == MAT_INITIAL_MATRIX) { 839 nz = bs2 * (aa->nz + bb->nz); 840 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 841 PetscCall(PetscMalloc1(nz, &val)); 842 mumps->nnz = nz; 843 mumps->irn = row; 844 mumps->jcn = col; 845 mumps->val = mumps->val_alloc = val; 846 } else { 847 val = mumps->val; 848 } 849 850 jj = 0; 851 irow = rstart; 852 for (i = 0; i < mbs; i++) { 853 countA = ai[i + 1] - ai[i]; 854 countB = bi[i + 1] - bi[i]; 855 ajj = aj + ai[i]; 856 bjj = bj + bi[i]; 857 v1 = av + bs2 * ai[i]; 858 v2 = bv + bs2 * bi[i]; 859 860 idx = 0; 861 /* A-part */ 862 for (k = 0; k < countA; k++) { 863 for (j = 0; j < bs; j++) { 864 for (n = 0; n < bs; n++) { 865 if (reuse == MAT_INITIAL_MATRIX) { 866 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 867 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 868 } 869 val[jj++] = v1[idx++]; 870 } 871 } 872 } 873 874 idx = 0; 875 /* B-part */ 876 for (k = 0; k < countB; k++) { 877 for (j = 0; j < bs; j++) { 878 for (n = 0; n < bs; n++) { 879 if (reuse == MAT_INITIAL_MATRIX) { 880 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 881 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 882 } 883 val[jj++] = v2[idx++]; 884 } 885 } 886 } 887 irow += bs; 888 } 889 PetscFunctionReturn(PETSC_SUCCESS); 890 } 891 892 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 893 { 894 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 895 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 896 PetscMUMPSInt *row, *col; 897 const PetscScalar *av, *bv, *v1, *v2; 898 PetscScalar *val; 899 Mat Ad, Ao; 900 Mat_SeqAIJ *aa; 901 Mat_SeqAIJ *bb; 902 #if defined(PETSC_USE_COMPLEX) 903 PetscBool hermitian, isset; 904 #endif 905 906 PetscFunctionBegin; 907 #if defined(PETSC_USE_COMPLEX) 908 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 909 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 910 #endif 911 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 912 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 913 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 914 915 aa = (Mat_SeqAIJ *)(Ad)->data; 916 bb = (Mat_SeqAIJ *)(Ao)->data; 917 ai = aa->i; 918 aj = aa->j; 919 adiag = aa->diag; 920 bi = bb->i; 921 bj = bb->j; 922 923 rstart = A->rmap->rstart; 924 925 if (reuse == MAT_INITIAL_MATRIX) { 926 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 927 nzb = 0; /* num of upper triangular entries in mat->B */ 928 for (i = 0; i < m; i++) { 929 nza += (ai[i + 1] - adiag[i]); 930 countB = bi[i + 1] - bi[i]; 931 bjj = bj + bi[i]; 932 for (j = 0; j < countB; j++) { 933 if (garray[bjj[j]] > rstart) nzb++; 934 } 935 } 936 937 nz = nza + nzb; /* total nz of upper triangular part of mat */ 938 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 939 PetscCall(PetscMalloc1(nz, &val)); 940 mumps->nnz = nz; 941 mumps->irn = row; 942 mumps->jcn = col; 943 mumps->val = mumps->val_alloc = val; 944 } else { 945 val = mumps->val; 946 } 947 948 jj = 0; 949 irow = rstart; 950 for (i = 0; i < m; i++) { 951 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 952 v1 = av + adiag[i]; 953 countA = ai[i + 1] - adiag[i]; 954 countB = bi[i + 1] - bi[i]; 955 bjj = bj + bi[i]; 956 v2 = bv + bi[i]; 957 958 /* A-part */ 959 for (j = 0; j < countA; j++) { 960 if (reuse == MAT_INITIAL_MATRIX) { 961 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 962 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 963 } 964 val[jj++] = v1[j]; 965 } 966 967 /* B-part */ 968 for (j = 0; j < countB; j++) { 969 if (garray[bjj[j]] > rstart) { 970 if (reuse == MAT_INITIAL_MATRIX) { 971 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 972 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 973 } 974 val[jj++] = v2[j]; 975 } 976 } 977 irow++; 978 } 979 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 980 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 981 PetscFunctionReturn(PETSC_SUCCESS); 982 } 983 984 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 985 { 986 const PetscScalar *av; 987 const PetscInt M = A->rmap->n; 988 PetscInt64 i; 989 PetscMUMPSInt *row, *col; 990 Vec v; 991 992 PetscFunctionBegin; 993 PetscCall(MatDiagonalGetDiagonal(A, &v)); 994 PetscCall(VecGetArrayRead(v, &av)); 995 if (reuse == MAT_INITIAL_MATRIX) { 996 PetscCall(PetscMalloc2(M, &row, M, &col)); 997 for (i = 0; i < M; i++) { 998 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 999 col[i] = row[i]; 1000 } 1001 mumps->val = (PetscScalar *)av; 1002 mumps->irn = row; 1003 mumps->jcn = col; 1004 mumps->nnz = M; 1005 } else PetscCall(PetscArraycpy(mumps->val, av, M)); 1006 PetscCall(VecRestoreArrayRead(v, &av)); 1007 PetscFunctionReturn(PETSC_SUCCESS); 1008 } 1009 1010 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1011 { 1012 Mat **mats; 1013 PetscInt nr, nc; 1014 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1015 1016 PetscFunctionBegin; 1017 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1018 if (reuse == MAT_INITIAL_MATRIX) { 1019 PetscMUMPSInt *irns, *jcns; 1020 PetscScalar *vals; 1021 PetscInt64 totnnz, cumnnz, maxnnz; 1022 PetscInt *pjcns_w; 1023 IS *rows, *cols; 1024 PetscInt **rows_idx, **cols_idx; 1025 1026 cumnnz = 0; 1027 maxnnz = 0; 1028 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1029 for (PetscInt r = 0; r < nr; r++) { 1030 for (PetscInt c = 0; c < nc; c++) { 1031 Mat sub = mats[r][c]; 1032 1033 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1034 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1035 if (sub) { 1036 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1037 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE, isDiag; 1038 MatInfo info; 1039 1040 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1041 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1042 else { 1043 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1044 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1045 } 1046 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1047 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1048 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1049 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1050 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1051 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1052 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1053 1054 if (chol) { 1055 if (r == c) { 1056 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1057 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1058 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1059 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1060 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1061 } else { 1062 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1063 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1064 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1065 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1066 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1067 } 1068 } else { 1069 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1070 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1071 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1072 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1073 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1074 } 1075 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1076 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1077 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1078 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1079 maxnnz = PetscMax(maxnnz, info.nz_used); 1080 } 1081 } 1082 } 1083 1084 /* Allocate total COO */ 1085 totnnz = cumnnz; 1086 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1087 PetscCall(PetscMalloc1(totnnz, &vals)); 1088 1089 /* Handle rows and column maps 1090 We directly map rows and use an SF for the columns */ 1091 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1092 PetscCall(MatNestGetISs(A, rows, cols)); 1093 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1094 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1095 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1096 else (void)maxnnz; 1097 1098 cumnnz = 0; 1099 for (PetscInt r = 0; r < nr; r++) { 1100 for (PetscInt c = 0; c < nc; c++) { 1101 Mat sub = mats[r][c]; 1102 const PetscInt *ridx = rows_idx[r]; 1103 const PetscInt *cidx = cols_idx[c]; 1104 PetscInt rst; 1105 PetscSF csf; 1106 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1107 PetscLayout cmap; 1108 1109 mumps->nest_vals_start[r * nc + c] = cumnnz; 1110 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1111 1112 /* Extract inner blocks if needed */ 1113 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1114 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1115 else { 1116 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1117 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1118 } 1119 swap = (PetscBool)(isTrans || isHTrans); 1120 1121 /* Get column layout to map off-process columns */ 1122 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1123 1124 /* Get row start to map on-process rows */ 1125 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1126 1127 /* Directly use the mumps datastructure and use C ordering for now */ 1128 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1129 1130 /* Swap the role of rows and columns indices for transposed blocks 1131 since we need values with global final ordering */ 1132 if (swap) { 1133 cidx = rows_idx[r]; 1134 ridx = cols_idx[c]; 1135 } 1136 1137 /* Communicate column indices 1138 This could have been done with a single SF but it would have complicated the code a lot. 1139 But since we do it only once, we pay the price of setting up an SF for each block */ 1140 if (PetscDefined(USE_64BIT_INDICES)) { 1141 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1142 } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 1143 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1144 PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1145 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1146 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1147 PetscCall(PetscSFDestroy(&csf)); 1148 1149 /* Import indices: use direct map for rows and mapped indices for columns */ 1150 if (swap) { 1151 for (PetscInt k = 0; k < mumps->nnz; k++) { 1152 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1153 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1154 } 1155 } else { 1156 for (PetscInt k = 0; k < mumps->nnz; k++) { 1157 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1158 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1159 } 1160 } 1161 1162 /* Import values to full COO */ 1163 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1164 if (isHTrans) { /* conjugate the entries */ 1165 PetscScalar *v = vals + cumnnz; 1166 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]); 1167 } 1168 1169 /* Shift new starting point and sanity check */ 1170 cumnnz += mumps->nnz; 1171 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1172 1173 /* Free scratch memory */ 1174 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1175 PetscCall(PetscFree(mumps->val_alloc)); 1176 mumps->val = NULL; 1177 mumps->nnz = 0; 1178 } 1179 } 1180 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1181 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1182 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1183 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1184 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1185 mumps->nest_vals_start[nr * nc] = cumnnz; 1186 1187 /* Set pointers for final MUMPS data structure */ 1188 mumps->nest_vals = vals; 1189 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1190 mumps->val = vals; 1191 mumps->irn = irns; 1192 mumps->jcn = jcns; 1193 mumps->nnz = cumnnz; 1194 } else { 1195 PetscScalar *oval = mumps->nest_vals; 1196 for (PetscInt r = 0; r < nr; r++) { 1197 for (PetscInt c = 0; c < nc; c++) { 1198 PetscBool isTrans, isHTrans = PETSC_FALSE; 1199 Mat sub = mats[r][c]; 1200 PetscInt midx = r * nc + c; 1201 1202 if (!mumps->nest_convert_to_triples[midx]) continue; 1203 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1204 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1205 else { 1206 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1207 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1208 } 1209 mumps->val = oval + mumps->nest_vals_start[midx]; 1210 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1211 if (isHTrans) { 1212 PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1213 for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1214 } 1215 } 1216 } 1217 mumps->val = oval; 1218 } 1219 PetscFunctionReturn(PETSC_SUCCESS); 1220 } 1221 1222 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1223 { 1224 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1225 1226 PetscFunctionBegin; 1227 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1228 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1229 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1230 PetscCall(VecDestroy(&mumps->b_seq)); 1231 PetscCall(VecDestroy(&mumps->x_seq)); 1232 PetscCall(PetscFree(mumps->id.perm_in)); 1233 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1234 PetscCall(PetscFree(mumps->val_alloc)); 1235 PetscCall(PetscFree(mumps->info)); 1236 PetscCall(PetscFree(mumps->ICNTL_pre)); 1237 PetscCall(PetscFree(mumps->CNTL_pre)); 1238 PetscCall(MatMumpsResetSchur_Private(mumps)); 1239 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1240 mumps->id.job = JOB_END; 1241 PetscMUMPS_c(mumps); 1242 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1243 if (mumps->mumps_comm != MPI_COMM_NULL) { 1244 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1245 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1246 } 1247 } 1248 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1249 if (mumps->use_petsc_omp_support) { 1250 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1251 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1252 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1253 } 1254 #endif 1255 PetscCall(PetscFree(mumps->ia_alloc)); 1256 PetscCall(PetscFree(mumps->ja_alloc)); 1257 PetscCall(PetscFree(mumps->recvcount)); 1258 PetscCall(PetscFree(mumps->reqs)); 1259 PetscCall(PetscFree(mumps->irhs_loc)); 1260 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1261 PetscCall(PetscFree(mumps->nest_vals)); 1262 PetscCall(PetscFree(A->data)); 1263 1264 /* clear composed functions */ 1265 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1266 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1267 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1268 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1269 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1270 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1271 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1272 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1273 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1274 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1275 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1276 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1277 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1278 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1279 PetscFunctionReturn(PETSC_SUCCESS); 1280 } 1281 1282 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1283 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1284 { 1285 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1286 const PetscMPIInt ompsize = mumps->omp_comm_size; 1287 PetscInt i, m, M, rstart; 1288 1289 PetscFunctionBegin; 1290 PetscCall(MatGetSize(A, &M, NULL)); 1291 PetscCall(MatGetLocalSize(A, &m, NULL)); 1292 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1293 if (ompsize == 1) { 1294 if (!mumps->irhs_loc) { 1295 mumps->nloc_rhs = m; 1296 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1297 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1298 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1299 } 1300 mumps->id.rhs_loc = (MumpsScalar *)array; 1301 } else { 1302 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1303 const PetscInt *ranges; 1304 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1305 MPI_Group petsc_group, omp_group; 1306 PetscScalar *recvbuf = NULL; 1307 1308 if (mumps->is_omp_master) { 1309 /* Lazily initialize the omp stuff for distributed rhs */ 1310 if (!mumps->irhs_loc) { 1311 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1312 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1313 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1314 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1315 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1316 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1317 1318 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1319 mumps->nloc_rhs = 0; 1320 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1321 for (j = 0; j < ompsize; j++) { 1322 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1323 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1324 } 1325 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1326 for (j = k = 0; j < ompsize; j++) { 1327 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1328 } 1329 1330 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1331 PetscCallMPI(MPI_Group_free(&petsc_group)); 1332 PetscCallMPI(MPI_Group_free(&omp_group)); 1333 } 1334 1335 /* Realloc buffers when current nrhs is bigger than what we have met */ 1336 if (nrhs > mumps->max_nrhs) { 1337 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1338 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1339 mumps->max_nrhs = nrhs; 1340 } 1341 1342 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1343 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1344 mumps->rhs_disps[0] = 0; 1345 for (j = 1; j < ompsize; j++) { 1346 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1347 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1348 } 1349 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1350 } 1351 1352 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1353 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1354 1355 if (mumps->is_omp_master) { 1356 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1357 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1358 for (j = 0; j < ompsize; j++) { 1359 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1360 dst = dstbase; 1361 for (i = 0; i < nrhs; i++) { 1362 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1363 src += mumps->rhs_nrow[j]; 1364 dst += mumps->nloc_rhs; 1365 } 1366 dstbase += mumps->rhs_nrow[j]; 1367 } 1368 } 1369 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1370 } 1371 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1372 } 1373 mumps->id.nrhs = nrhs; 1374 mumps->id.nloc_rhs = mumps->nloc_rhs; 1375 mumps->id.lrhs_loc = mumps->nloc_rhs; 1376 mumps->id.irhs_loc = mumps->irhs_loc; 1377 PetscFunctionReturn(PETSC_SUCCESS); 1378 } 1379 1380 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1381 { 1382 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1383 const PetscScalar *rarray = NULL; 1384 PetscScalar *array; 1385 IS is_iden, is_petsc; 1386 PetscInt i; 1387 PetscBool second_solve = PETSC_FALSE; 1388 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1389 1390 PetscFunctionBegin; 1391 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1392 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1393 &cite1)); 1394 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1395 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1396 &cite2)); 1397 1398 if (A->factorerrortype) { 1399 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1400 PetscCall(VecSetInf(x)); 1401 PetscFunctionReturn(PETSC_SUCCESS); 1402 } 1403 1404 mumps->id.nrhs = 1; 1405 if (mumps->petsc_size > 1) { 1406 if (mumps->ICNTL20 == 10) { 1407 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1408 PetscCall(VecGetArrayRead(b, &rarray)); 1409 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1410 } else { 1411 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1412 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1413 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1414 if (!mumps->myid) { 1415 PetscCall(VecGetArray(mumps->b_seq, &array)); 1416 mumps->id.rhs = (MumpsScalar *)array; 1417 } 1418 } 1419 } else { /* petsc_size == 1 */ 1420 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1421 PetscCall(VecCopy(b, x)); 1422 PetscCall(VecGetArray(x, &array)); 1423 mumps->id.rhs = (MumpsScalar *)array; 1424 } 1425 1426 /* 1427 handle condensation step of Schur complement (if any) 1428 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1429 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1430 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1431 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1432 */ 1433 if (mumps->id.size_schur > 0) { 1434 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1435 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1436 second_solve = PETSC_TRUE; 1437 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1438 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1439 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1440 } 1441 /* solve phase */ 1442 mumps->id.job = JOB_SOLVE; 1443 PetscMUMPS_c(mumps); 1444 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1445 1446 /* handle expansion step of Schur complement (if any) */ 1447 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1448 else if (mumps->id.ICNTL(26) == 1) { 1449 PetscCall(MatMumpsSolveSchur_Private(A)); 1450 for (i = 0; i < mumps->id.size_schur; ++i) { 1451 #if !defined(PETSC_USE_COMPLEX) 1452 PetscScalar val = mumps->id.redrhs[i]; 1453 #else 1454 PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i; 1455 #endif 1456 array[mumps->id.listvar_schur[i] - 1] = val; 1457 } 1458 } 1459 1460 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1461 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1462 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1463 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1464 } 1465 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1466 PetscInt *isol2_loc = NULL; 1467 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1468 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1469 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1470 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1471 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1472 PetscCall(ISDestroy(&is_iden)); 1473 PetscCall(ISDestroy(&is_petsc)); 1474 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1475 } 1476 1477 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1478 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1479 } 1480 1481 if (mumps->petsc_size > 1) { 1482 if (mumps->ICNTL20 == 10) { 1483 PetscCall(VecRestoreArrayRead(b, &rarray)); 1484 } else if (!mumps->myid) { 1485 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1486 } 1487 } else PetscCall(VecRestoreArray(x, &array)); 1488 1489 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1490 PetscFunctionReturn(PETSC_SUCCESS); 1491 } 1492 1493 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1494 { 1495 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1496 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1497 1498 PetscFunctionBegin; 1499 mumps->id.ICNTL(9) = 0; 1500 PetscCall(MatSolve_MUMPS(A, b, x)); 1501 mumps->id.ICNTL(9) = value; 1502 PetscFunctionReturn(PETSC_SUCCESS); 1503 } 1504 1505 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1506 { 1507 Mat Bt = NULL; 1508 PetscBool denseX, denseB, flg, flgT; 1509 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1510 PetscInt i, nrhs, M; 1511 PetscScalar *array; 1512 const PetscScalar *rbray; 1513 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1514 PetscMUMPSInt *isol_loc, *isol_loc_save; 1515 PetscScalar *bray, *sol_loc, *sol_loc_save; 1516 IS is_to, is_from; 1517 PetscInt k, proc, j, m, myrstart; 1518 const PetscInt *rstart; 1519 Vec v_mpi, msol_loc; 1520 VecScatter scat_sol; 1521 Vec b_seq; 1522 VecScatter scat_rhs; 1523 PetscScalar *aa; 1524 PetscInt spnr, *ia, *ja; 1525 Mat_MPIAIJ *b = NULL; 1526 1527 PetscFunctionBegin; 1528 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1529 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1530 1531 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1532 if (denseB) { 1533 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1534 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1535 } else { /* sparse B */ 1536 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1537 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1538 if (flgT) { /* input B is transpose of actual RHS matrix, 1539 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1540 PetscCall(MatTransposeGetMat(B, &Bt)); 1541 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1542 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1543 } 1544 1545 PetscCall(MatGetSize(B, &M, &nrhs)); 1546 mumps->id.nrhs = nrhs; 1547 mumps->id.lrhs = M; 1548 mumps->id.rhs = NULL; 1549 1550 if (mumps->petsc_size == 1) { 1551 PetscScalar *aa; 1552 PetscInt spnr, *ia, *ja; 1553 PetscBool second_solve = PETSC_FALSE; 1554 1555 PetscCall(MatDenseGetArray(X, &array)); 1556 mumps->id.rhs = (MumpsScalar *)array; 1557 1558 if (denseB) { 1559 /* copy B to X */ 1560 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1561 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1562 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1563 } else { /* sparse B */ 1564 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1565 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1566 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1567 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1568 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1569 } 1570 /* handle condensation step of Schur complement (if any) */ 1571 if (mumps->id.size_schur > 0) { 1572 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1573 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1574 second_solve = PETSC_TRUE; 1575 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1576 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1577 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1578 } 1579 /* solve phase */ 1580 mumps->id.job = JOB_SOLVE; 1581 PetscMUMPS_c(mumps); 1582 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1583 1584 /* handle expansion step of Schur complement (if any) */ 1585 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1586 else if (mumps->id.ICNTL(26) == 1) { 1587 PetscCall(MatMumpsSolveSchur_Private(A)); 1588 for (j = 0; j < nrhs; ++j) 1589 for (i = 0; i < mumps->id.size_schur; ++i) { 1590 #if !defined(PETSC_USE_COMPLEX) 1591 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs]; 1592 #else 1593 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i; 1594 #endif 1595 array[mumps->id.listvar_schur[i] - 1 + j * M] = val; 1596 } 1597 } 1598 if (!denseB) { /* sparse B */ 1599 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1600 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1601 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1602 } 1603 PetscCall(MatDenseRestoreArray(X, &array)); 1604 PetscFunctionReturn(PETSC_SUCCESS); 1605 } 1606 1607 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1608 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1609 1610 /* create msol_loc to hold mumps local solution */ 1611 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1612 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1613 1614 lsol_loc = mumps->id.lsol_loc; 1615 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1616 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1617 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1618 mumps->id.isol_loc = isol_loc; 1619 1620 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1621 1622 if (denseB) { 1623 if (mumps->ICNTL20 == 10) { 1624 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1625 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1626 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1627 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1628 PetscCall(MatGetLocalSize(B, &m, NULL)); 1629 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1630 } else { 1631 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1632 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1633 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1634 0, re-arrange B into desired order, which is a local operation. 1635 */ 1636 1637 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1638 /* wrap dense rhs matrix B into a vector v_mpi */ 1639 PetscCall(MatGetLocalSize(B, &m, NULL)); 1640 PetscCall(MatDenseGetArray(B, &bray)); 1641 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1642 PetscCall(MatDenseRestoreArray(B, &bray)); 1643 1644 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1645 if (!mumps->myid) { 1646 PetscInt *idx; 1647 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1648 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1649 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1650 k = 0; 1651 for (proc = 0; proc < mumps->petsc_size; proc++) { 1652 for (j = 0; j < nrhs; j++) { 1653 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1654 } 1655 } 1656 1657 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1658 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1659 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1660 } else { 1661 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1662 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1663 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1664 } 1665 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1666 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1667 PetscCall(ISDestroy(&is_to)); 1668 PetscCall(ISDestroy(&is_from)); 1669 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1670 1671 if (!mumps->myid) { /* define rhs on the host */ 1672 PetscCall(VecGetArray(b_seq, &bray)); 1673 mumps->id.rhs = (MumpsScalar *)bray; 1674 PetscCall(VecRestoreArray(b_seq, &bray)); 1675 } 1676 } 1677 } else { /* sparse B */ 1678 b = (Mat_MPIAIJ *)Bt->data; 1679 1680 /* wrap dense X into a vector v_mpi */ 1681 PetscCall(MatGetLocalSize(X, &m, NULL)); 1682 PetscCall(MatDenseGetArray(X, &bray)); 1683 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1684 PetscCall(MatDenseRestoreArray(X, &bray)); 1685 1686 if (!mumps->myid) { 1687 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1688 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1689 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1690 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1691 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1692 } else { 1693 mumps->id.irhs_ptr = NULL; 1694 mumps->id.irhs_sparse = NULL; 1695 mumps->id.nz_rhs = 0; 1696 mumps->id.rhs_sparse = NULL; 1697 } 1698 } 1699 1700 /* solve phase */ 1701 mumps->id.job = JOB_SOLVE; 1702 PetscMUMPS_c(mumps); 1703 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1704 1705 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1706 PetscCall(MatDenseGetArray(X, &array)); 1707 PetscCall(VecPlaceArray(v_mpi, array)); 1708 1709 /* create scatter scat_sol */ 1710 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1711 /* iidx: index for scatter mumps solution to petsc X */ 1712 1713 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1714 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1715 for (i = 0; i < lsol_loc; i++) { 1716 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1717 1718 for (proc = 0; proc < mumps->petsc_size; proc++) { 1719 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1720 myrstart = rstart[proc]; 1721 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1722 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1723 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1724 break; 1725 } 1726 } 1727 1728 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1729 } 1730 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1731 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1732 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1733 PetscCall(ISDestroy(&is_from)); 1734 PetscCall(ISDestroy(&is_to)); 1735 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1736 PetscCall(MatDenseRestoreArray(X, &array)); 1737 1738 /* free spaces */ 1739 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1740 mumps->id.isol_loc = isol_loc_save; 1741 1742 PetscCall(PetscFree2(sol_loc, isol_loc)); 1743 PetscCall(PetscFree(idxx)); 1744 PetscCall(VecDestroy(&msol_loc)); 1745 PetscCall(VecDestroy(&v_mpi)); 1746 if (!denseB) { 1747 if (!mumps->myid) { 1748 b = (Mat_MPIAIJ *)Bt->data; 1749 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1750 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1751 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1752 } 1753 } else { 1754 if (mumps->ICNTL20 == 0) { 1755 PetscCall(VecDestroy(&b_seq)); 1756 PetscCall(VecScatterDestroy(&scat_rhs)); 1757 } 1758 } 1759 PetscCall(VecScatterDestroy(&scat_sol)); 1760 PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)))); 1761 PetscFunctionReturn(PETSC_SUCCESS); 1762 } 1763 1764 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1765 { 1766 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1767 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1768 1769 PetscFunctionBegin; 1770 mumps->id.ICNTL(9) = 0; 1771 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1772 mumps->id.ICNTL(9) = value; 1773 PetscFunctionReturn(PETSC_SUCCESS); 1774 } 1775 1776 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1777 { 1778 PetscBool flg; 1779 Mat B; 1780 1781 PetscFunctionBegin; 1782 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1783 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1784 1785 /* Create B=Bt^T that uses Bt's data structure */ 1786 PetscCall(MatCreateTranspose(Bt, &B)); 1787 1788 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1789 PetscCall(MatDestroy(&B)); 1790 PetscFunctionReturn(PETSC_SUCCESS); 1791 } 1792 1793 #if !defined(PETSC_USE_COMPLEX) 1794 /* 1795 input: 1796 F: numeric factor 1797 output: 1798 nneg: total number of negative pivots 1799 nzero: total number of zero pivots 1800 npos: (global dimension of F) - nneg - nzero 1801 */ 1802 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1803 { 1804 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1805 PetscMPIInt size; 1806 1807 PetscFunctionBegin; 1808 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1809 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1810 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1811 1812 if (nneg) *nneg = mumps->id.INFOG(12); 1813 if (nzero || npos) { 1814 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1815 if (nzero) *nzero = mumps->id.INFOG(28); 1816 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1817 } 1818 PetscFunctionReturn(PETSC_SUCCESS); 1819 } 1820 #endif 1821 1822 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1823 { 1824 PetscInt i, nreqs; 1825 PetscMUMPSInt *irn, *jcn; 1826 PetscMPIInt count; 1827 PetscInt64 totnnz, remain; 1828 const PetscInt osize = mumps->omp_comm_size; 1829 PetscScalar *val; 1830 1831 PetscFunctionBegin; 1832 if (osize > 1) { 1833 if (reuse == MAT_INITIAL_MATRIX) { 1834 /* master first gathers counts of nonzeros to receive */ 1835 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1836 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1837 1838 /* Then each computes number of send/recvs */ 1839 if (mumps->is_omp_master) { 1840 /* Start from 1 since self communication is not done in MPI */ 1841 nreqs = 0; 1842 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1843 } else { 1844 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1845 } 1846 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1847 1848 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1849 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1850 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1851 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1852 */ 1853 nreqs = 0; /* counter for actual send/recvs */ 1854 if (mumps->is_omp_master) { 1855 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1856 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1857 PetscCall(PetscMalloc1(totnnz, &val)); 1858 1859 /* Self communication */ 1860 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1861 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1862 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1863 1864 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1865 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1866 PetscCall(PetscFree(mumps->val_alloc)); 1867 mumps->nnz = totnnz; 1868 mumps->irn = irn; 1869 mumps->jcn = jcn; 1870 mumps->val = mumps->val_alloc = val; 1871 1872 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1873 jcn += mumps->recvcount[0]; 1874 val += mumps->recvcount[0]; 1875 1876 /* Remote communication */ 1877 for (i = 1; i < osize; i++) { 1878 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1879 remain = mumps->recvcount[i] - count; 1880 while (count > 0) { 1881 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1882 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1883 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1884 irn += count; 1885 jcn += count; 1886 val += count; 1887 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1888 remain -= count; 1889 } 1890 } 1891 } else { 1892 irn = mumps->irn; 1893 jcn = mumps->jcn; 1894 val = mumps->val; 1895 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1896 remain = mumps->nnz - count; 1897 while (count > 0) { 1898 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1899 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1900 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1901 irn += count; 1902 jcn += count; 1903 val += count; 1904 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1905 remain -= count; 1906 } 1907 } 1908 } else { 1909 nreqs = 0; 1910 if (mumps->is_omp_master) { 1911 val = mumps->val + mumps->recvcount[0]; 1912 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1913 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1914 remain = mumps->recvcount[i] - count; 1915 while (count > 0) { 1916 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1917 val += count; 1918 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1919 remain -= count; 1920 } 1921 } 1922 } else { 1923 val = mumps->val; 1924 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1925 remain = mumps->nnz - count; 1926 while (count > 0) { 1927 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1928 val += count; 1929 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1930 remain -= count; 1931 } 1932 } 1933 } 1934 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1935 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1936 } 1937 PetscFunctionReturn(PETSC_SUCCESS); 1938 } 1939 1940 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1941 { 1942 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1943 PetscBool isMPIAIJ; 1944 1945 PetscFunctionBegin; 1946 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1947 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1948 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1949 PetscFunctionReturn(PETSC_SUCCESS); 1950 } 1951 1952 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1953 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1954 1955 /* numerical factorization phase */ 1956 mumps->id.job = JOB_FACTNUMERIC; 1957 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1958 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1959 } else { 1960 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1961 } 1962 PetscMUMPS_c(mumps); 1963 if (mumps->id.INFOG(1) < 0) { 1964 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 1965 if (mumps->id.INFOG(1) == -10) { 1966 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1967 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1968 } else if (mumps->id.INFOG(1) == -13) { 1969 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1970 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1971 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1972 PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1973 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1974 } else { 1975 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1976 F->factorerrortype = MAT_FACTOR_OTHER; 1977 } 1978 } 1979 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 1980 1981 F->assembled = PETSC_TRUE; 1982 1983 if (F->schur) { /* reset Schur status to unfactored */ 1984 #if defined(PETSC_HAVE_CUDA) 1985 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1986 #endif 1987 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1988 mumps->id.ICNTL(19) = 2; 1989 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1990 } 1991 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1992 } 1993 1994 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1995 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1996 1997 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1998 if (mumps->petsc_size > 1) { 1999 PetscInt lsol_loc; 2000 PetscScalar *sol_loc; 2001 2002 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 2003 2004 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2005 if (mumps->x_seq) { 2006 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2007 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 2008 PetscCall(VecDestroy(&mumps->x_seq)); 2009 } 2010 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2011 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 2012 mumps->id.lsol_loc = lsol_loc; 2013 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 2014 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 2015 } 2016 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 2017 PetscFunctionReturn(PETSC_SUCCESS); 2018 } 2019 2020 /* Sets MUMPS options from the options database */ 2021 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2022 { 2023 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2024 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2025 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2026 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 2027 MumpsScalar *arr; 2028 2029 PetscFunctionBegin; 2030 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2031 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2032 PetscInt nthreads = 0; 2033 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2034 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2035 2036 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2037 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2038 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2039 2040 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2041 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2042 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2043 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2044 if (mumps->use_petsc_omp_support) { 2045 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2046 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2047 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2048 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2049 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2050 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2051 #endif 2052 } else { 2053 mumps->omp_comm = PETSC_COMM_SELF; 2054 mumps->mumps_comm = mumps->petsc_comm; 2055 mumps->is_omp_master = PETSC_TRUE; 2056 } 2057 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2058 mumps->reqs = NULL; 2059 mumps->tag = 0; 2060 2061 if (mumps->mumps_comm != MPI_COMM_NULL) { 2062 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2063 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2064 MPI_Comm comm; 2065 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2066 mumps->mumps_comm = comm; 2067 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2068 } 2069 2070 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2071 mumps->id.job = JOB_INIT; 2072 mumps->id.par = 1; /* host participates factorizaton and solve */ 2073 mumps->id.sym = mumps->sym; 2074 2075 size = mumps->id.size_schur; 2076 arr = mumps->id.schur; 2077 listvar_schur = mumps->id.listvar_schur; 2078 PetscMUMPS_c(mumps); 2079 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2080 /* restore cached ICNTL and CNTL values */ 2081 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2082 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2083 PetscCall(PetscFree(mumps->ICNTL_pre)); 2084 PetscCall(PetscFree(mumps->CNTL_pre)); 2085 2086 if (schur) { 2087 mumps->id.size_schur = size; 2088 mumps->id.schur_lld = size; 2089 mumps->id.schur = arr; 2090 mumps->id.listvar_schur = listvar_schur; 2091 if (mumps->petsc_size > 1) { 2092 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2093 2094 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2095 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2096 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2097 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2098 } else { 2099 if (F->factortype == MAT_FACTOR_LU) { 2100 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2101 } else { 2102 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2103 } 2104 } 2105 mumps->id.ICNTL(26) = -1; 2106 } 2107 2108 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2109 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2110 */ 2111 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2112 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2113 2114 mumps->scat_rhs = NULL; 2115 mumps->scat_sol = NULL; 2116 2117 /* set PETSc-MUMPS default options - override MUMPS default */ 2118 mumps->id.ICNTL(3) = 0; 2119 mumps->id.ICNTL(4) = 0; 2120 if (mumps->petsc_size == 1) { 2121 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2122 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2123 } else { 2124 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2125 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2126 } 2127 } 2128 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2129 if (flg) mumps->id.ICNTL(1) = icntl; 2130 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2131 if (flg) mumps->id.ICNTL(2) = icntl; 2132 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2133 if (flg) mumps->id.ICNTL(3) = icntl; 2134 2135 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2136 if (flg) mumps->id.ICNTL(4) = icntl; 2137 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2138 2139 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2140 if (flg) mumps->id.ICNTL(6) = icntl; 2141 2142 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2143 if (flg) { 2144 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2145 mumps->id.ICNTL(7) = icntl; 2146 } 2147 2148 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2149 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2150 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2151 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2152 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2153 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2154 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2155 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2156 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2157 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2158 if (flg) { 2159 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2160 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2161 } 2162 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2163 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2164 PetscCall(MatDestroy(&F->schur)); 2165 PetscCall(MatMumpsResetSchur_Private(mumps)); 2166 } 2167 2168 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2169 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2170 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2171 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2172 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2173 In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 2174 */ 2175 #if PETSC_PKG_MUMPS_VERSION_GE(5, 6, 2) && defined(PETSC_HAVE_MUMPS_AVOID_MPI_IN_PLACE) 2176 mumps->ICNTL20 = 10; 2177 #elif PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 2178 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2179 #else 2180 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2181 #endif 2182 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2183 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2184 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2185 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2186 #endif 2187 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2188 2189 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2190 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2191 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2192 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2193 2194 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2195 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2196 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2197 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2198 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2199 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2200 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2201 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2202 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2203 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2204 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2205 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2206 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2207 2208 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2209 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2210 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2211 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2212 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2213 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2214 2215 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2216 2217 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2218 if (ninfo) { 2219 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2220 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2221 mumps->ninfo = ninfo; 2222 for (i = 0; i < ninfo; i++) { 2223 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2224 mumps->info[i] = info[i]; 2225 } 2226 } 2227 PetscOptionsEnd(); 2228 PetscFunctionReturn(PETSC_SUCCESS); 2229 } 2230 2231 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2232 { 2233 PetscFunctionBegin; 2234 if (mumps->id.INFOG(1) < 0) { 2235 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2236 if (mumps->id.INFOG(1) == -6) { 2237 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2238 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2239 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2240 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2241 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2242 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2243 PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n")); 2244 } else { 2245 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2246 F->factorerrortype = MAT_FACTOR_OTHER; 2247 } 2248 } 2249 PetscFunctionReturn(PETSC_SUCCESS); 2250 } 2251 2252 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2253 { 2254 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2255 Vec b; 2256 const PetscInt M = A->rmap->N; 2257 2258 PetscFunctionBegin; 2259 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2260 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2261 PetscFunctionReturn(PETSC_SUCCESS); 2262 } 2263 2264 /* Set MUMPS options from the options database */ 2265 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2266 2267 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2268 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2269 2270 /* analysis phase */ 2271 mumps->id.job = JOB_FACTSYMBOLIC; 2272 mumps->id.n = M; 2273 switch (mumps->id.ICNTL(18)) { 2274 case 0: /* centralized assembled matrix input */ 2275 if (!mumps->myid) { 2276 mumps->id.nnz = mumps->nnz; 2277 mumps->id.irn = mumps->irn; 2278 mumps->id.jcn = mumps->jcn; 2279 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2280 if (r) { 2281 mumps->id.ICNTL(7) = 1; 2282 if (!mumps->myid) { 2283 const PetscInt *idx; 2284 PetscInt i; 2285 2286 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2287 PetscCall(ISGetIndices(r, &idx)); 2288 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2289 PetscCall(ISRestoreIndices(r, &idx)); 2290 } 2291 } 2292 } 2293 break; 2294 case 3: /* distributed assembled matrix input (size>1) */ 2295 mumps->id.nnz_loc = mumps->nnz; 2296 mumps->id.irn_loc = mumps->irn; 2297 mumps->id.jcn_loc = mumps->jcn; 2298 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2299 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2300 PetscCall(MatCreateVecs(A, NULL, &b)); 2301 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2302 PetscCall(VecDestroy(&b)); 2303 } 2304 break; 2305 } 2306 PetscMUMPS_c(mumps); 2307 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2308 2309 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2310 F->ops->solve = MatSolve_MUMPS; 2311 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2312 F->ops->matsolve = MatMatSolve_MUMPS; 2313 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2314 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2315 2316 mumps->matstruc = SAME_NONZERO_PATTERN; 2317 PetscFunctionReturn(PETSC_SUCCESS); 2318 } 2319 2320 /* Note the Petsc r and c permutations are ignored */ 2321 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2322 { 2323 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2324 Vec b; 2325 const PetscInt M = A->rmap->N; 2326 2327 PetscFunctionBegin; 2328 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2329 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2330 PetscFunctionReturn(PETSC_SUCCESS); 2331 } 2332 2333 /* Set MUMPS options from the options database */ 2334 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2335 2336 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2337 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2338 2339 /* analysis phase */ 2340 mumps->id.job = JOB_FACTSYMBOLIC; 2341 mumps->id.n = M; 2342 switch (mumps->id.ICNTL(18)) { 2343 case 0: /* centralized assembled matrix input */ 2344 if (!mumps->myid) { 2345 mumps->id.nnz = mumps->nnz; 2346 mumps->id.irn = mumps->irn; 2347 mumps->id.jcn = mumps->jcn; 2348 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2349 } 2350 break; 2351 case 3: /* distributed assembled matrix input (size>1) */ 2352 mumps->id.nnz_loc = mumps->nnz; 2353 mumps->id.irn_loc = mumps->irn; 2354 mumps->id.jcn_loc = mumps->jcn; 2355 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2356 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2357 PetscCall(MatCreateVecs(A, NULL, &b)); 2358 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2359 PetscCall(VecDestroy(&b)); 2360 } 2361 break; 2362 } 2363 PetscMUMPS_c(mumps); 2364 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2365 2366 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2367 F->ops->solve = MatSolve_MUMPS; 2368 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2369 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2370 2371 mumps->matstruc = SAME_NONZERO_PATTERN; 2372 PetscFunctionReturn(PETSC_SUCCESS); 2373 } 2374 2375 /* Note the Petsc r permutation and factor info are ignored */ 2376 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2377 { 2378 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2379 Vec b; 2380 const PetscInt M = A->rmap->N; 2381 2382 PetscFunctionBegin; 2383 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2384 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2385 PetscFunctionReturn(PETSC_SUCCESS); 2386 } 2387 2388 /* Set MUMPS options from the options database */ 2389 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2390 2391 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2392 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2393 2394 /* analysis phase */ 2395 mumps->id.job = JOB_FACTSYMBOLIC; 2396 mumps->id.n = M; 2397 switch (mumps->id.ICNTL(18)) { 2398 case 0: /* centralized assembled matrix input */ 2399 if (!mumps->myid) { 2400 mumps->id.nnz = mumps->nnz; 2401 mumps->id.irn = mumps->irn; 2402 mumps->id.jcn = mumps->jcn; 2403 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2404 } 2405 break; 2406 case 3: /* distributed assembled matrix input (size>1) */ 2407 mumps->id.nnz_loc = mumps->nnz; 2408 mumps->id.irn_loc = mumps->irn; 2409 mumps->id.jcn_loc = mumps->jcn; 2410 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2411 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2412 PetscCall(MatCreateVecs(A, NULL, &b)); 2413 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2414 PetscCall(VecDestroy(&b)); 2415 } 2416 break; 2417 } 2418 PetscMUMPS_c(mumps); 2419 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2420 2421 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2422 F->ops->solve = MatSolve_MUMPS; 2423 F->ops->solvetranspose = MatSolve_MUMPS; 2424 F->ops->matsolve = MatMatSolve_MUMPS; 2425 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2426 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2427 #if defined(PETSC_USE_COMPLEX) 2428 F->ops->getinertia = NULL; 2429 #else 2430 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2431 #endif 2432 2433 mumps->matstruc = SAME_NONZERO_PATTERN; 2434 PetscFunctionReturn(PETSC_SUCCESS); 2435 } 2436 2437 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2438 { 2439 PetscBool iascii; 2440 PetscViewerFormat format; 2441 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2442 2443 PetscFunctionBegin; 2444 /* check if matrix is mumps type */ 2445 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2446 2447 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2448 if (iascii) { 2449 PetscCall(PetscViewerGetFormat(viewer, &format)); 2450 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2451 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2452 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2453 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2454 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2455 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2456 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2457 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2458 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2459 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2460 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2461 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2462 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2463 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2464 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2465 if (mumps->id.ICNTL(11) > 0) { 2466 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2467 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2468 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2469 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2470 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2471 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2472 } 2473 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2474 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2475 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2476 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2477 /* ICNTL(15-17) not used */ 2478 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2479 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2480 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2481 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2482 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2483 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2484 2485 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2486 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2487 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2488 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2489 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2490 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2491 2492 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2493 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2494 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2495 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2496 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2497 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2498 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2499 2500 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2501 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2502 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2503 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2504 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2505 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2506 2507 /* information local to each processor */ 2508 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2509 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2510 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2511 PetscCall(PetscViewerFlush(viewer)); 2512 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2513 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2514 PetscCall(PetscViewerFlush(viewer)); 2515 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2516 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2517 PetscCall(PetscViewerFlush(viewer)); 2518 2519 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2520 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2521 PetscCall(PetscViewerFlush(viewer)); 2522 2523 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2524 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2525 PetscCall(PetscViewerFlush(viewer)); 2526 2527 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2528 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2529 PetscCall(PetscViewerFlush(viewer)); 2530 2531 if (mumps->ninfo && mumps->ninfo <= 80) { 2532 PetscInt i; 2533 for (i = 0; i < mumps->ninfo; i++) { 2534 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2535 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2536 PetscCall(PetscViewerFlush(viewer)); 2537 } 2538 } 2539 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2540 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2541 2542 if (mumps->myid == 0) { /* information from the host */ 2543 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2544 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2545 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2546 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2547 2548 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2549 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2550 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2551 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2552 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2553 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2554 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2555 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2556 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2557 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2558 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2559 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2560 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2561 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2562 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2563 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2564 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2565 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2566 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2567 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2568 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2569 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2570 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2571 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2572 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2573 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2574 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2575 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2576 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2577 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2578 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2579 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2580 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2581 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2582 } 2583 } 2584 } 2585 PetscFunctionReturn(PETSC_SUCCESS); 2586 } 2587 2588 static PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2589 { 2590 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2591 2592 PetscFunctionBegin; 2593 info->block_size = 1.0; 2594 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2595 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2596 info->nz_unneeded = 0.0; 2597 info->assemblies = 0.0; 2598 info->mallocs = 0.0; 2599 info->memory = 0.0; 2600 info->fill_ratio_given = 0; 2601 info->fill_ratio_needed = 0; 2602 info->factor_mallocs = 0; 2603 PetscFunctionReturn(PETSC_SUCCESS); 2604 } 2605 2606 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2607 { 2608 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2609 const PetscScalar *arr; 2610 const PetscInt *idxs; 2611 PetscInt size, i; 2612 2613 PetscFunctionBegin; 2614 PetscCall(ISGetLocalSize(is, &size)); 2615 /* Schur complement matrix */ 2616 PetscCall(MatDestroy(&F->schur)); 2617 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2618 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2619 mumps->id.schur = (MumpsScalar *)arr; 2620 mumps->id.size_schur = size; 2621 mumps->id.schur_lld = size; 2622 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2623 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2624 2625 /* MUMPS expects Fortran style indices */ 2626 PetscCall(PetscFree(mumps->id.listvar_schur)); 2627 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2628 PetscCall(ISGetIndices(is, &idxs)); 2629 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2630 PetscCall(ISRestoreIndices(is, &idxs)); 2631 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2632 mumps->id.ICNTL(26) = -1; 2633 PetscFunctionReturn(PETSC_SUCCESS); 2634 } 2635 2636 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2637 { 2638 Mat St; 2639 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2640 PetscScalar *array; 2641 2642 PetscFunctionBegin; 2643 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 2644 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2645 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2646 PetscCall(MatSetType(St, MATDENSE)); 2647 PetscCall(MatSetUp(St)); 2648 PetscCall(MatDenseGetArray(St, &array)); 2649 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2650 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2651 PetscInt i, j, N = mumps->id.size_schur; 2652 for (i = 0; i < N; i++) { 2653 for (j = 0; j < N; j++) { 2654 #if !defined(PETSC_USE_COMPLEX) 2655 PetscScalar val = mumps->id.schur[i * N + j]; 2656 #else 2657 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2658 #endif 2659 array[j * N + i] = val; 2660 } 2661 } 2662 } else { /* stored by columns */ 2663 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2664 } 2665 } else { /* either full or lower-triangular (not packed) */ 2666 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2667 PetscInt i, j, N = mumps->id.size_schur; 2668 for (i = 0; i < N; i++) { 2669 for (j = i; j < N; j++) { 2670 #if !defined(PETSC_USE_COMPLEX) 2671 PetscScalar val = mumps->id.schur[i * N + j]; 2672 #else 2673 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2674 #endif 2675 array[i * N + j] = array[j * N + i] = val; 2676 } 2677 } 2678 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2679 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2680 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2681 PetscInt i, j, N = mumps->id.size_schur; 2682 for (i = 0; i < N; i++) { 2683 for (j = 0; j < i + 1; j++) { 2684 #if !defined(PETSC_USE_COMPLEX) 2685 PetscScalar val = mumps->id.schur[i * N + j]; 2686 #else 2687 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2688 #endif 2689 array[i * N + j] = array[j * N + i] = val; 2690 } 2691 } 2692 } 2693 } 2694 PetscCall(MatDenseRestoreArray(St, &array)); 2695 *S = St; 2696 PetscFunctionReturn(PETSC_SUCCESS); 2697 } 2698 2699 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2700 { 2701 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2702 2703 PetscFunctionBegin; 2704 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2705 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2706 for (i = 0; i < nICNTL_pre; ++i) 2707 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2708 if (i == nICNTL_pre) { /* not already cached */ 2709 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2710 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2711 mumps->ICNTL_pre[0]++; 2712 } 2713 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2714 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2715 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2716 PetscFunctionReturn(PETSC_SUCCESS); 2717 } 2718 2719 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2720 { 2721 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2722 2723 PetscFunctionBegin; 2724 if (mumps->id.job == JOB_NULL) { 2725 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2726 *ival = 0; 2727 for (i = 0; i < nICNTL_pre; ++i) { 2728 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2729 } 2730 } else *ival = mumps->id.ICNTL(icntl); 2731 PetscFunctionReturn(PETSC_SUCCESS); 2732 } 2733 2734 /*@ 2735 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2736 2737 Logically Collective 2738 2739 Input Parameters: 2740 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2741 . icntl - index of MUMPS parameter array ICNTL() 2742 - ival - value of MUMPS ICNTL(icntl) 2743 2744 Options Database Key: 2745 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2746 2747 Level: beginner 2748 2749 References: 2750 . * - MUMPS Users' Guide 2751 2752 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2753 @*/ 2754 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2755 { 2756 PetscFunctionBegin; 2757 PetscValidType(F, 1); 2758 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2759 PetscValidLogicalCollectiveInt(F, icntl, 2); 2760 PetscValidLogicalCollectiveInt(F, ival, 3); 2761 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2762 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2763 PetscFunctionReturn(PETSC_SUCCESS); 2764 } 2765 2766 /*@ 2767 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2768 2769 Logically Collective 2770 2771 Input Parameters: 2772 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2773 - icntl - index of MUMPS parameter array ICNTL() 2774 2775 Output Parameter: 2776 . ival - value of MUMPS ICNTL(icntl) 2777 2778 Level: beginner 2779 2780 References: 2781 . * - MUMPS Users' Guide 2782 2783 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2784 @*/ 2785 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2786 { 2787 PetscFunctionBegin; 2788 PetscValidType(F, 1); 2789 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2790 PetscValidLogicalCollectiveInt(F, icntl, 2); 2791 PetscAssertPointer(ival, 3); 2792 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2793 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2794 PetscFunctionReturn(PETSC_SUCCESS); 2795 } 2796 2797 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2798 { 2799 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2800 2801 PetscFunctionBegin; 2802 if (mumps->id.job == JOB_NULL) { 2803 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2804 for (i = 0; i < nCNTL_pre; ++i) 2805 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2806 if (i == nCNTL_pre) { 2807 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2808 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2809 mumps->CNTL_pre[0]++; 2810 } 2811 mumps->CNTL_pre[1 + 2 * i] = icntl; 2812 mumps->CNTL_pre[2 + 2 * i] = val; 2813 } else mumps->id.CNTL(icntl) = val; 2814 PetscFunctionReturn(PETSC_SUCCESS); 2815 } 2816 2817 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2818 { 2819 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2820 2821 PetscFunctionBegin; 2822 if (mumps->id.job == JOB_NULL) { 2823 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2824 *val = 0.0; 2825 for (i = 0; i < nCNTL_pre; ++i) { 2826 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2827 } 2828 } else *val = mumps->id.CNTL(icntl); 2829 PetscFunctionReturn(PETSC_SUCCESS); 2830 } 2831 2832 /*@ 2833 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2834 2835 Logically Collective 2836 2837 Input Parameters: 2838 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2839 . icntl - index of MUMPS parameter array CNTL() 2840 - val - value of MUMPS CNTL(icntl) 2841 2842 Options Database Key: 2843 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2844 2845 Level: beginner 2846 2847 References: 2848 . * - MUMPS Users' Guide 2849 2850 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2851 @*/ 2852 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2853 { 2854 PetscFunctionBegin; 2855 PetscValidType(F, 1); 2856 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2857 PetscValidLogicalCollectiveInt(F, icntl, 2); 2858 PetscValidLogicalCollectiveReal(F, val, 3); 2859 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2860 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2861 PetscFunctionReturn(PETSC_SUCCESS); 2862 } 2863 2864 /*@ 2865 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2866 2867 Logically Collective 2868 2869 Input Parameters: 2870 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2871 - icntl - index of MUMPS parameter array CNTL() 2872 2873 Output Parameter: 2874 . val - value of MUMPS CNTL(icntl) 2875 2876 Level: beginner 2877 2878 References: 2879 . * - MUMPS Users' Guide 2880 2881 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2882 @*/ 2883 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2884 { 2885 PetscFunctionBegin; 2886 PetscValidType(F, 1); 2887 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2888 PetscValidLogicalCollectiveInt(F, icntl, 2); 2889 PetscAssertPointer(val, 3); 2890 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2891 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2892 PetscFunctionReturn(PETSC_SUCCESS); 2893 } 2894 2895 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2896 { 2897 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2898 2899 PetscFunctionBegin; 2900 *info = mumps->id.INFO(icntl); 2901 PetscFunctionReturn(PETSC_SUCCESS); 2902 } 2903 2904 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2905 { 2906 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2907 2908 PetscFunctionBegin; 2909 *infog = mumps->id.INFOG(icntl); 2910 PetscFunctionReturn(PETSC_SUCCESS); 2911 } 2912 2913 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2914 { 2915 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2916 2917 PetscFunctionBegin; 2918 *rinfo = mumps->id.RINFO(icntl); 2919 PetscFunctionReturn(PETSC_SUCCESS); 2920 } 2921 2922 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2923 { 2924 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2925 2926 PetscFunctionBegin; 2927 *rinfog = mumps->id.RINFOG(icntl); 2928 PetscFunctionReturn(PETSC_SUCCESS); 2929 } 2930 2931 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2932 { 2933 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2934 2935 PetscFunctionBegin; 2936 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2937 *size = 0; 2938 *array = NULL; 2939 if (!mumps->myid) { 2940 *size = mumps->id.INFOG(28); 2941 PetscCall(PetscMalloc1(*size, array)); 2942 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2943 } 2944 PetscFunctionReturn(PETSC_SUCCESS); 2945 } 2946 2947 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2948 { 2949 Mat Bt = NULL, Btseq = NULL; 2950 PetscBool flg; 2951 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2952 PetscScalar *aa; 2953 PetscInt spnr, *ia, *ja, M, nrhs; 2954 2955 PetscFunctionBegin; 2956 PetscAssertPointer(spRHS, 2); 2957 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2958 if (flg) { 2959 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2960 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2961 2962 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2963 2964 if (mumps->petsc_size > 1) { 2965 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2966 Btseq = b->A; 2967 } else { 2968 Btseq = Bt; 2969 } 2970 2971 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2972 mumps->id.nrhs = nrhs; 2973 mumps->id.lrhs = M; 2974 mumps->id.rhs = NULL; 2975 2976 if (!mumps->myid) { 2977 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2978 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2979 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2980 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2981 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2982 } else { 2983 mumps->id.irhs_ptr = NULL; 2984 mumps->id.irhs_sparse = NULL; 2985 mumps->id.nz_rhs = 0; 2986 mumps->id.rhs_sparse = NULL; 2987 } 2988 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2989 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2990 2991 /* solve phase */ 2992 mumps->id.job = JOB_SOLVE; 2993 PetscMUMPS_c(mumps); 2994 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2995 2996 if (!mumps->myid) { 2997 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 2998 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2999 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3000 } 3001 PetscFunctionReturn(PETSC_SUCCESS); 3002 } 3003 3004 /*@ 3005 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 3006 3007 Logically Collective 3008 3009 Input Parameter: 3010 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3011 3012 Output Parameter: 3013 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3014 3015 Level: beginner 3016 3017 References: 3018 . * - MUMPS Users' Guide 3019 3020 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3021 @*/ 3022 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3023 { 3024 PetscFunctionBegin; 3025 PetscValidType(F, 1); 3026 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3027 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3028 PetscFunctionReturn(PETSC_SUCCESS); 3029 } 3030 3031 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3032 { 3033 Mat spRHS; 3034 3035 PetscFunctionBegin; 3036 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3037 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3038 PetscCall(MatDestroy(&spRHS)); 3039 PetscFunctionReturn(PETSC_SUCCESS); 3040 } 3041 3042 /*@ 3043 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 3044 3045 Logically Collective 3046 3047 Input Parameter: 3048 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3049 3050 Output Parameter: 3051 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3052 3053 Level: beginner 3054 3055 References: 3056 . * - MUMPS Users' Guide 3057 3058 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3059 @*/ 3060 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3061 { 3062 PetscBool flg; 3063 3064 PetscFunctionBegin; 3065 PetscValidType(F, 1); 3066 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3067 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3068 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3069 3070 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3071 PetscFunctionReturn(PETSC_SUCCESS); 3072 } 3073 3074 /*@ 3075 MatMumpsGetInfo - Get MUMPS parameter INFO() 3076 3077 Logically Collective 3078 3079 Input Parameters: 3080 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3081 - icntl - index of MUMPS parameter array INFO() 3082 3083 Output Parameter: 3084 . ival - value of MUMPS INFO(icntl) 3085 3086 Level: beginner 3087 3088 References: 3089 . * - MUMPS Users' Guide 3090 3091 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3092 @*/ 3093 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3094 { 3095 PetscFunctionBegin; 3096 PetscValidType(F, 1); 3097 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3098 PetscAssertPointer(ival, 3); 3099 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3100 PetscFunctionReturn(PETSC_SUCCESS); 3101 } 3102 3103 /*@ 3104 MatMumpsGetInfog - Get MUMPS parameter INFOG() 3105 3106 Logically Collective 3107 3108 Input Parameters: 3109 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3110 - icntl - index of MUMPS parameter array INFOG() 3111 3112 Output Parameter: 3113 . ival - value of MUMPS INFOG(icntl) 3114 3115 Level: beginner 3116 3117 References: 3118 . * - MUMPS Users' Guide 3119 3120 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3121 @*/ 3122 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3123 { 3124 PetscFunctionBegin; 3125 PetscValidType(F, 1); 3126 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3127 PetscAssertPointer(ival, 3); 3128 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3129 PetscFunctionReturn(PETSC_SUCCESS); 3130 } 3131 3132 /*@ 3133 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3134 3135 Logically Collective 3136 3137 Input Parameters: 3138 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3139 - icntl - index of MUMPS parameter array RINFO() 3140 3141 Output Parameter: 3142 . val - value of MUMPS RINFO(icntl) 3143 3144 Level: beginner 3145 3146 References: 3147 . * - MUMPS Users' Guide 3148 3149 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3150 @*/ 3151 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3152 { 3153 PetscFunctionBegin; 3154 PetscValidType(F, 1); 3155 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3156 PetscAssertPointer(val, 3); 3157 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3158 PetscFunctionReturn(PETSC_SUCCESS); 3159 } 3160 3161 /*@ 3162 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3163 3164 Logically Collective 3165 3166 Input Parameters: 3167 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3168 - icntl - index of MUMPS parameter array RINFOG() 3169 3170 Output Parameter: 3171 . val - value of MUMPS RINFOG(icntl) 3172 3173 Level: beginner 3174 3175 References: 3176 . * - MUMPS Users' Guide 3177 3178 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3179 @*/ 3180 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3181 { 3182 PetscFunctionBegin; 3183 PetscValidType(F, 1); 3184 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3185 PetscAssertPointer(val, 3); 3186 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3187 PetscFunctionReturn(PETSC_SUCCESS); 3188 } 3189 3190 /*@ 3191 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 3192 3193 Logically Collective 3194 3195 Input Parameter: 3196 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3197 3198 Output Parameters: 3199 + size - local size of the array. The size of the array is non-zero only on the host. 3200 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3201 for freeing this array. 3202 3203 Level: beginner 3204 3205 References: 3206 . * - MUMPS Users' Guide 3207 3208 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3209 @*/ 3210 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3211 { 3212 PetscFunctionBegin; 3213 PetscValidType(F, 1); 3214 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3215 PetscAssertPointer(size, 2); 3216 PetscAssertPointer(array, 3); 3217 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3218 PetscFunctionReturn(PETSC_SUCCESS); 3219 } 3220 3221 /*MC 3222 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3223 distributed and sequential matrices via the external package MUMPS. 3224 3225 Works with `MATAIJ` and `MATSBAIJ` matrices 3226 3227 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3228 3229 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3230 See details below. 3231 3232 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3233 3234 Options Database Keys: 3235 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3236 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3237 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3238 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3239 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3240 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3241 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3242 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3243 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3244 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3245 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3246 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3247 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3248 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3249 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3250 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3251 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3252 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3253 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3254 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3255 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3256 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 3257 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3258 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3259 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3260 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3261 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3262 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3263 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3264 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3265 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3266 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3267 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3268 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3269 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3270 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3271 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3272 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3273 3274 Level: beginner 3275 3276 Notes: 3277 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 3278 error if the matrix is Hermitian. 3279 3280 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3281 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3282 3283 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3284 the failure with 3285 .vb 3286 KSPGetPC(ksp,&pc); 3287 PCFactorGetMatrix(pc,&mat); 3288 MatMumpsGetInfo(mat,....); 3289 MatMumpsGetInfog(mat,....); etc. 3290 .ve 3291 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3292 3293 MUMPS provides 64-bit integer support in two build modes: 3294 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3295 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3296 3297 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3298 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3299 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3300 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3301 3302 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3303 3304 Two modes to run MUMPS/PETSc with OpenMP 3305 .vb 3306 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3307 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3308 .ve 3309 3310 .vb 3311 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3312 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3313 .ve 3314 3315 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3316 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3317 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3318 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3319 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3320 3321 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3322 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3323 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3324 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3325 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3326 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3327 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3328 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3329 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3330 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3331 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3332 examine the mapping result. 3333 3334 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3335 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3336 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3337 3338 References: 3339 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3340 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3341 3342 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3343 M*/ 3344 3345 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3346 { 3347 PetscFunctionBegin; 3348 *type = MATSOLVERMUMPS; 3349 PetscFunctionReturn(PETSC_SUCCESS); 3350 } 3351 3352 /* MatGetFactor for Seq and MPI AIJ matrices */ 3353 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3354 { 3355 Mat B; 3356 Mat_MUMPS *mumps; 3357 PetscBool isSeqAIJ, isDiag; 3358 PetscMPIInt size; 3359 3360 PetscFunctionBegin; 3361 #if defined(PETSC_USE_COMPLEX) 3362 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3363 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3364 *F = NULL; 3365 PetscFunctionReturn(PETSC_SUCCESS); 3366 } 3367 #endif 3368 /* Create the factorization matrix */ 3369 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3370 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 3371 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3372 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3373 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3374 PetscCall(MatSetUp(B)); 3375 3376 PetscCall(PetscNew(&mumps)); 3377 3378 B->ops->view = MatView_MUMPS; 3379 B->ops->getinfo = MatGetInfo_MUMPS; 3380 3381 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3382 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3383 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3384 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3385 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3386 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3387 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3388 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3389 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3390 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3391 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3392 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3393 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3394 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3395 3396 if (ftype == MAT_FACTOR_LU) { 3397 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3398 B->factortype = MAT_FACTOR_LU; 3399 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3400 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3401 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3402 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3403 mumps->sym = 0; 3404 } else { 3405 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3406 B->factortype = MAT_FACTOR_CHOLESKY; 3407 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3408 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3409 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3410 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3411 #if defined(PETSC_USE_COMPLEX) 3412 mumps->sym = 2; 3413 #else 3414 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3415 else mumps->sym = 2; 3416 #endif 3417 } 3418 3419 /* set solvertype */ 3420 PetscCall(PetscFree(B->solvertype)); 3421 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3422 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3423 if (size == 1) { 3424 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3425 B->canuseordering = PETSC_TRUE; 3426 } 3427 B->ops->destroy = MatDestroy_MUMPS; 3428 B->data = (void *)mumps; 3429 3430 *F = B; 3431 mumps->id.job = JOB_NULL; 3432 mumps->ICNTL_pre = NULL; 3433 mumps->CNTL_pre = NULL; 3434 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3435 PetscFunctionReturn(PETSC_SUCCESS); 3436 } 3437 3438 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3439 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3440 { 3441 Mat B; 3442 Mat_MUMPS *mumps; 3443 PetscBool isSeqSBAIJ; 3444 PetscMPIInt size; 3445 3446 PetscFunctionBegin; 3447 #if defined(PETSC_USE_COMPLEX) 3448 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3449 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3450 *F = NULL; 3451 PetscFunctionReturn(PETSC_SUCCESS); 3452 } 3453 #endif 3454 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3455 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3456 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3457 PetscCall(MatSetUp(B)); 3458 3459 PetscCall(PetscNew(&mumps)); 3460 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3461 if (isSeqSBAIJ) { 3462 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3463 } else { 3464 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3465 } 3466 3467 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3468 B->ops->view = MatView_MUMPS; 3469 B->ops->getinfo = MatGetInfo_MUMPS; 3470 3471 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3472 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3473 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3474 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3475 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3476 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3477 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3478 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3479 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3480 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3481 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3482 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3483 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3484 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3485 3486 B->factortype = MAT_FACTOR_CHOLESKY; 3487 #if defined(PETSC_USE_COMPLEX) 3488 mumps->sym = 2; 3489 #else 3490 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3491 else mumps->sym = 2; 3492 #endif 3493 3494 /* set solvertype */ 3495 PetscCall(PetscFree(B->solvertype)); 3496 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3497 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3498 if (size == 1) { 3499 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3500 B->canuseordering = PETSC_TRUE; 3501 } 3502 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3503 B->ops->destroy = MatDestroy_MUMPS; 3504 B->data = (void *)mumps; 3505 3506 *F = B; 3507 mumps->id.job = JOB_NULL; 3508 mumps->ICNTL_pre = NULL; 3509 mumps->CNTL_pre = NULL; 3510 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3511 PetscFunctionReturn(PETSC_SUCCESS); 3512 } 3513 3514 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3515 { 3516 Mat B; 3517 Mat_MUMPS *mumps; 3518 PetscBool isSeqBAIJ; 3519 PetscMPIInt size; 3520 3521 PetscFunctionBegin; 3522 /* Create the factorization matrix */ 3523 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3524 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3525 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3526 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3527 PetscCall(MatSetUp(B)); 3528 3529 PetscCall(PetscNew(&mumps)); 3530 if (ftype == MAT_FACTOR_LU) { 3531 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3532 B->factortype = MAT_FACTOR_LU; 3533 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3534 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3535 mumps->sym = 0; 3536 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3537 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3538 3539 B->ops->view = MatView_MUMPS; 3540 B->ops->getinfo = MatGetInfo_MUMPS; 3541 3542 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3543 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3544 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3545 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3546 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3547 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3548 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3549 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3550 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3551 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3552 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3553 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3554 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3555 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3556 3557 /* set solvertype */ 3558 PetscCall(PetscFree(B->solvertype)); 3559 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3560 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3561 if (size == 1) { 3562 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3563 B->canuseordering = PETSC_TRUE; 3564 } 3565 B->ops->destroy = MatDestroy_MUMPS; 3566 B->data = (void *)mumps; 3567 3568 *F = B; 3569 mumps->id.job = JOB_NULL; 3570 mumps->ICNTL_pre = NULL; 3571 mumps->CNTL_pre = NULL; 3572 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3573 PetscFunctionReturn(PETSC_SUCCESS); 3574 } 3575 3576 /* MatGetFactor for Seq and MPI SELL matrices */ 3577 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3578 { 3579 Mat B; 3580 Mat_MUMPS *mumps; 3581 PetscBool isSeqSELL; 3582 PetscMPIInt size; 3583 3584 PetscFunctionBegin; 3585 /* Create the factorization matrix */ 3586 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3587 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3588 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3589 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3590 PetscCall(MatSetUp(B)); 3591 3592 PetscCall(PetscNew(&mumps)); 3593 3594 B->ops->view = MatView_MUMPS; 3595 B->ops->getinfo = MatGetInfo_MUMPS; 3596 3597 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3598 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3599 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3600 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3601 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3602 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3603 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3604 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3605 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3606 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3607 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3608 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3609 3610 if (ftype == MAT_FACTOR_LU) { 3611 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3612 B->factortype = MAT_FACTOR_LU; 3613 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3614 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3615 mumps->sym = 0; 3616 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3617 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3618 3619 /* set solvertype */ 3620 PetscCall(PetscFree(B->solvertype)); 3621 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3622 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3623 if (size == 1) { 3624 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3625 B->canuseordering = PETSC_TRUE; 3626 } 3627 B->ops->destroy = MatDestroy_MUMPS; 3628 B->data = (void *)mumps; 3629 3630 *F = B; 3631 mumps->id.job = JOB_NULL; 3632 mumps->ICNTL_pre = NULL; 3633 mumps->CNTL_pre = NULL; 3634 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3635 PetscFunctionReturn(PETSC_SUCCESS); 3636 } 3637 3638 /* MatGetFactor for MATNEST matrices */ 3639 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3640 { 3641 Mat B, **mats; 3642 Mat_MUMPS *mumps; 3643 PetscInt nr, nc; 3644 PetscMPIInt size; 3645 PetscBool flg = PETSC_TRUE; 3646 3647 PetscFunctionBegin; 3648 #if defined(PETSC_USE_COMPLEX) 3649 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3650 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3651 *F = NULL; 3652 PetscFunctionReturn(PETSC_SUCCESS); 3653 } 3654 #endif 3655 3656 /* Return if some condition is not satisfied */ 3657 *F = NULL; 3658 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3659 if (ftype == MAT_FACTOR_CHOLESKY) { 3660 IS *rows, *cols; 3661 PetscInt *m, *M; 3662 3663 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3664 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3665 PetscCall(MatNestGetISs(A, rows, cols)); 3666 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3667 if (!flg) { 3668 PetscCall(PetscFree2(rows, cols)); 3669 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3670 PetscFunctionReturn(PETSC_SUCCESS); 3671 } 3672 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3673 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3674 for (PetscInt r = 0; flg && r < nr; r++) 3675 for (PetscInt k = r + 1; flg && k < nr; k++) 3676 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3677 PetscCall(PetscFree2(m, M)); 3678 PetscCall(PetscFree2(rows, cols)); 3679 if (!flg) { 3680 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3681 PetscFunctionReturn(PETSC_SUCCESS); 3682 } 3683 } 3684 3685 for (PetscInt r = 0; r < nr; r++) { 3686 for (PetscInt c = 0; c < nc; c++) { 3687 Mat sub = mats[r][c]; 3688 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isDiag; 3689 3690 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3691 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3692 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3693 else { 3694 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3695 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3696 } 3697 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3698 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3699 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3700 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3701 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3702 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3703 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 3704 if (ftype == MAT_FACTOR_CHOLESKY) { 3705 if (r == c) { 3706 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag) { 3707 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3708 flg = PETSC_FALSE; 3709 } 3710 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag) { 3711 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3712 flg = PETSC_FALSE; 3713 } 3714 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag) { 3715 PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3716 flg = PETSC_FALSE; 3717 } 3718 } 3719 } 3720 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3721 3722 /* Create the factorization matrix */ 3723 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3724 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3725 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3726 PetscCall(MatSetUp(B)); 3727 3728 PetscCall(PetscNew(&mumps)); 3729 3730 B->ops->view = MatView_MUMPS; 3731 B->ops->getinfo = MatGetInfo_MUMPS; 3732 3733 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3734 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3735 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3736 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3737 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3738 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3739 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3740 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3741 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3742 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3743 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3744 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3745 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3746 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3747 3748 if (ftype == MAT_FACTOR_LU) { 3749 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3750 B->factortype = MAT_FACTOR_LU; 3751 mumps->sym = 0; 3752 } else { 3753 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3754 B->factortype = MAT_FACTOR_CHOLESKY; 3755 #if defined(PETSC_USE_COMPLEX) 3756 mumps->sym = 2; 3757 #else 3758 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3759 else mumps->sym = 2; 3760 #endif 3761 } 3762 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3763 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3764 3765 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3766 if (size == 1) { 3767 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3768 B->canuseordering = PETSC_TRUE; 3769 } 3770 3771 /* set solvertype */ 3772 PetscCall(PetscFree(B->solvertype)); 3773 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3774 B->ops->destroy = MatDestroy_MUMPS; 3775 B->data = (void *)mumps; 3776 3777 *F = B; 3778 mumps->id.job = JOB_NULL; 3779 mumps->ICNTL_pre = NULL; 3780 mumps->CNTL_pre = NULL; 3781 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3782 PetscFunctionReturn(PETSC_SUCCESS); 3783 } 3784 3785 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3786 { 3787 PetscFunctionBegin; 3788 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3789 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3790 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3791 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3792 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3793 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3794 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3795 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3796 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3797 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3798 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3799 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3800 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3801 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3802 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3803 PetscFunctionReturn(PETSC_SUCCESS); 3804 } 3805