1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <petscsf.h> 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 9 #include <../src/mat/impls/sell/mpi/mpisell.h> 10 11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #if defined(PETSC_USE_REAL_SINGLE) 16 #include <cmumps_c.h> 17 #else 18 #include <zmumps_c.h> 19 #endif 20 #else 21 #if defined(PETSC_USE_REAL_SINGLE) 22 #include <smumps_c.h> 23 #else 24 #include <dmumps_c.h> 25 #endif 26 #endif 27 EXTERN_C_END 28 #define JOB_INIT -1 29 #define JOB_NULL 0 30 #define JOB_FACTSYMBOLIC 1 31 #define JOB_FACTNUMERIC 2 32 #define JOB_SOLVE 3 33 #define JOB_END -2 34 35 /* calls to MUMPS */ 36 #if defined(PETSC_USE_COMPLEX) 37 #if defined(PETSC_USE_REAL_SINGLE) 38 #define MUMPS_c cmumps_c 39 #else 40 #define MUMPS_c zmumps_c 41 #endif 42 #else 43 #if defined(PETSC_USE_REAL_SINGLE) 44 #define MUMPS_c smumps_c 45 #else 46 #define MUMPS_c dmumps_c 47 #endif 48 #endif 49 50 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 51 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 52 naming convention in PetscMPIInt, PetscBLASInt etc. 53 */ 54 typedef MUMPS_INT PetscMUMPSInt; 55 56 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 57 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 58 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 59 #endif 60 #else 61 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 62 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 63 #endif 64 #endif 65 66 #define MPIU_MUMPSINT MPI_INT 67 #define PETSC_MUMPS_INT_MAX 2147483647 68 #define PETSC_MUMPS_INT_MIN -2147483648 69 70 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 71 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 72 { 73 PetscFunctionBegin; 74 #if PetscDefined(USE_64BIT_INDICES) 75 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 76 #endif 77 *b = (PetscMUMPSInt)(a); 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 /* Put these utility routines here since they are only used in this file */ 82 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 83 { 84 PetscInt myval; 85 PetscBool myset; 86 PetscFunctionBegin; 87 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 88 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 89 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 90 if (set) *set = myset; 91 PetscFunctionReturn(PETSC_SUCCESS); 92 } 93 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 94 95 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 96 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 97 #define PetscMUMPS_c(mumps) \ 98 do { \ 99 if (mumps->use_petsc_omp_support) { \ 100 if (mumps->is_omp_master) { \ 101 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 102 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 103 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 104 PetscCall(PetscFPTrapPop()); \ 105 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 106 } \ 107 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 108 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 109 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 110 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 111 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 112 */ \ 113 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 115 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 116 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 117 } else { \ 118 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 119 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 120 PetscCall(PetscFPTrapPop()); \ 121 } \ 122 } while (0) 123 #else 124 #define PetscMUMPS_c(mumps) \ 125 do { \ 126 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 127 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 128 PetscCall(PetscFPTrapPop()); \ 129 } while (0) 130 #endif 131 132 /* declare MumpsScalar */ 133 #if defined(PETSC_USE_COMPLEX) 134 #if defined(PETSC_USE_REAL_SINGLE) 135 #define MumpsScalar mumps_complex 136 #else 137 #define MumpsScalar mumps_double_complex 138 #endif 139 #else 140 #define MumpsScalar PetscScalar 141 #endif 142 143 /* macros s.t. indices match MUMPS documentation */ 144 #define ICNTL(I) icntl[(I)-1] 145 #define CNTL(I) cntl[(I)-1] 146 #define INFOG(I) infog[(I)-1] 147 #define INFO(I) info[(I)-1] 148 #define RINFOG(I) rinfog[(I)-1] 149 #define RINFO(I) rinfo[(I)-1] 150 151 typedef struct Mat_MUMPS Mat_MUMPS; 152 struct Mat_MUMPS { 153 #if defined(PETSC_USE_COMPLEX) 154 #if defined(PETSC_USE_REAL_SINGLE) 155 CMUMPS_STRUC_C id; 156 #else 157 ZMUMPS_STRUC_C id; 158 #endif 159 #else 160 #if defined(PETSC_USE_REAL_SINGLE) 161 SMUMPS_STRUC_C id; 162 #else 163 DMUMPS_STRUC_C id; 164 #endif 165 #endif 166 167 MatStructure matstruc; 168 PetscMPIInt myid, petsc_size; 169 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 170 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 171 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 172 PetscMUMPSInt sym; 173 MPI_Comm mumps_comm; 174 PetscMUMPSInt *ICNTL_pre; 175 PetscReal *CNTL_pre; 176 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 177 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 178 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 179 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 180 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 181 PetscInt *rhs_nrow, max_nrhs; 182 PetscMPIInt *rhs_recvcounts, *rhs_disps; 183 PetscScalar *rhs_loc, *rhs_recvbuf; 184 #endif 185 Vec b_seq, x_seq; 186 PetscInt ninfo, *info; /* which INFO to display */ 187 PetscInt sizeredrhs; 188 PetscScalar *schur_sol; 189 PetscInt schur_sizesol; 190 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 191 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 192 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 193 194 /* Support for MATNEST */ 195 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 196 PetscInt64 *nest_vals_start; 197 PetscScalar *nest_vals; 198 199 /* stuff used by petsc/mumps OpenMP support*/ 200 PetscBool use_petsc_omp_support; 201 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 202 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 203 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 204 PetscMPIInt tag, omp_comm_size; 205 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 206 MPI_Request *reqs; 207 }; 208 209 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 210 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 211 */ 212 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 213 { 214 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 215 216 PetscFunctionBegin; 217 #if defined(PETSC_USE_64BIT_INDICES) 218 { 219 PetscInt i; 220 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 221 PetscCall(PetscFree(mumps->ia_alloc)); 222 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 223 mumps->cur_ilen = nrow + 1; 224 } 225 if (nnz > mumps->cur_jlen) { 226 PetscCall(PetscFree(mumps->ja_alloc)); 227 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 228 mumps->cur_jlen = nnz; 229 } 230 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 231 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 232 *ia_mumps = mumps->ia_alloc; 233 *ja_mumps = mumps->ja_alloc; 234 } 235 #else 236 *ia_mumps = ia; 237 *ja_mumps = ja; 238 #endif 239 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 240 PetscFunctionReturn(PETSC_SUCCESS); 241 } 242 243 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 244 { 245 PetscFunctionBegin; 246 PetscCall(PetscFree(mumps->id.listvar_schur)); 247 PetscCall(PetscFree(mumps->id.redrhs)); 248 PetscCall(PetscFree(mumps->schur_sol)); 249 mumps->id.size_schur = 0; 250 mumps->id.schur_lld = 0; 251 mumps->id.ICNTL(19) = 0; 252 PetscFunctionReturn(PETSC_SUCCESS); 253 } 254 255 /* solve with rhs in mumps->id.redrhs and return in the same location */ 256 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 257 { 258 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 259 Mat S, B, X; 260 MatFactorSchurStatus schurstatus; 261 PetscInt sizesol; 262 263 PetscFunctionBegin; 264 PetscCall(MatFactorFactorizeSchurComplement(F)); 265 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 266 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 267 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 268 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 269 PetscCall(MatBindToCPU(B, S->boundtocpu)); 270 #endif 271 switch (schurstatus) { 272 case MAT_FACTOR_SCHUR_FACTORED: 273 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 274 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 275 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 276 PetscCall(MatBindToCPU(X, S->boundtocpu)); 277 #endif 278 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 279 PetscCall(MatMatSolveTranspose(S, B, X)); 280 } else { 281 PetscCall(MatMatSolve(S, B, X)); 282 } 283 break; 284 case MAT_FACTOR_SCHUR_INVERTED: 285 sizesol = mumps->id.nrhs * mumps->id.size_schur; 286 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 287 PetscCall(PetscFree(mumps->schur_sol)); 288 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 289 mumps->schur_sizesol = sizesol; 290 } 291 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 292 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 294 PetscCall(MatBindToCPU(X, S->boundtocpu)); 295 #endif 296 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 297 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 298 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 299 } else { 300 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 301 } 302 PetscCall(MatProductSetFromOptions(X)); 303 PetscCall(MatProductSymbolic(X)); 304 PetscCall(MatProductNumeric(X)); 305 306 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 307 break; 308 default: 309 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 310 } 311 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 312 PetscCall(MatDestroy(&B)); 313 PetscCall(MatDestroy(&X)); 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 318 { 319 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 320 321 PetscFunctionBegin; 322 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 if (!expansion) { /* prepare for the condensation step */ 326 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 327 /* allocate MUMPS internal array to store reduced right-hand sides */ 328 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 329 PetscCall(PetscFree(mumps->id.redrhs)); 330 mumps->id.lredrhs = mumps->id.size_schur; 331 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 332 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 333 } 334 } else { /* prepare for the expansion step */ 335 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 336 PetscCall(MatMumpsSolveSchur_Private(F)); 337 mumps->id.ICNTL(26) = 2; /* expansion phase */ 338 PetscMUMPS_c(mumps); 339 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 340 /* restore defaults */ 341 mumps->id.ICNTL(26) = -1; 342 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 343 if (mumps->id.nrhs > 1) { 344 PetscCall(PetscFree(mumps->id.redrhs)); 345 mumps->id.lredrhs = 0; 346 mumps->sizeredrhs = 0; 347 } 348 } 349 PetscFunctionReturn(PETSC_SUCCESS); 350 } 351 352 /* 353 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 354 355 input: 356 A - matrix in aij,baij or sbaij format 357 shift - 0: C style output triple; 1: Fortran style output triple. 358 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 359 MAT_REUSE_MATRIX: only the values in v array are updated 360 output: 361 nnz - dim of r, c, and v (number of local nonzero entries of A) 362 r, c, v - row and col index, matrix values (matrix triples) 363 364 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 365 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 366 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 367 368 */ 369 370 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 371 { 372 const PetscScalar *av; 373 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 374 PetscInt64 nz, rnz, i, j, k; 375 PetscMUMPSInt *row, *col; 376 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 377 378 PetscFunctionBegin; 379 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 380 if (reuse == MAT_INITIAL_MATRIX) { 381 nz = aa->nz; 382 ai = aa->i; 383 aj = aa->j; 384 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 385 for (i = k = 0; i < M; i++) { 386 rnz = ai[i + 1] - ai[i]; 387 ajj = aj + ai[i]; 388 for (j = 0; j < rnz; j++) { 389 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 390 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 391 k++; 392 } 393 } 394 mumps->val = (PetscScalar *)av; 395 mumps->irn = row; 396 mumps->jcn = col; 397 mumps->nnz = nz; 398 } else PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); 399 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 404 { 405 PetscInt64 nz, i, j, k, r; 406 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 407 PetscMUMPSInt *row, *col; 408 409 PetscFunctionBegin; 410 nz = a->sliidx[a->totalslices]; 411 if (reuse == MAT_INITIAL_MATRIX) { 412 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 413 for (i = k = 0; i < a->totalslices; i++) { 414 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 415 } 416 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 417 mumps->irn = row; 418 mumps->jcn = col; 419 mumps->nnz = nz; 420 mumps->val = a->val; 421 } else PetscCall(PetscArraycpy(mumps->val, a->val, nz)); 422 PetscFunctionReturn(PETSC_SUCCESS); 423 } 424 425 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 426 { 427 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 428 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 429 PetscInt64 M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 430 PetscInt bs; 431 PetscMUMPSInt *row, *col; 432 433 PetscFunctionBegin; 434 if (reuse == MAT_INITIAL_MATRIX) { 435 PetscCall(MatGetBlockSize(A, &bs)); 436 M = A->rmap->N / bs; 437 ai = aa->i; 438 aj = aa->j; 439 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 440 for (i = 0; i < M; i++) { 441 ajj = aj + ai[i]; 442 rnz = ai[i + 1] - ai[i]; 443 for (k = 0; k < rnz; k++) { 444 for (j = 0; j < bs; j++) { 445 for (m = 0; m < bs; m++) { 446 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 447 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 448 idx++; 449 } 450 } 451 } 452 } 453 mumps->irn = row; 454 mumps->jcn = col; 455 mumps->nnz = nz; 456 mumps->val = aa->a; 457 } else PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); 458 PetscFunctionReturn(PETSC_SUCCESS); 459 } 460 461 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 462 { 463 const PetscInt *ai, *aj, *ajj; 464 PetscInt bs; 465 PetscInt64 nz, rnz, i, j, k, m; 466 PetscMUMPSInt *row, *col; 467 PetscScalar *val; 468 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 469 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 470 #if defined(PETSC_USE_COMPLEX) 471 PetscBool isset, hermitian; 472 #endif 473 474 PetscFunctionBegin; 475 #if defined(PETSC_USE_COMPLEX) 476 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 477 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 478 #endif 479 ai = aa->i; 480 aj = aa->j; 481 PetscCall(MatGetBlockSize(A, &bs)); 482 if (reuse == MAT_INITIAL_MATRIX) { 483 const PetscInt64 alloc_size = aa->nz * bs2; 484 485 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 486 if (bs > 1) { 487 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 488 mumps->val = mumps->val_alloc; 489 } else { 490 mumps->val = aa->a; 491 } 492 mumps->irn = row; 493 mumps->jcn = col; 494 } else { 495 row = mumps->irn; 496 col = mumps->jcn; 497 } 498 val = mumps->val; 499 500 nz = 0; 501 if (bs > 1) { 502 for (i = 0; i < mbs; i++) { 503 rnz = ai[i + 1] - ai[i]; 504 ajj = aj + ai[i]; 505 for (j = 0; j < rnz; j++) { 506 for (k = 0; k < bs; k++) { 507 for (m = 0; m < bs; m++) { 508 if (ajj[j] > i || k >= m) { 509 if (reuse == MAT_INITIAL_MATRIX) { 510 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 511 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 512 } 513 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 514 } 515 } 516 } 517 } 518 } 519 } else if (reuse == MAT_INITIAL_MATRIX) { 520 for (i = 0; i < mbs; i++) { 521 rnz = ai[i + 1] - ai[i]; 522 ajj = aj + ai[i]; 523 for (j = 0; j < rnz; j++) { 524 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 525 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 526 nz++; 527 } 528 } 529 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 530 } else PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX */ 531 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 532 PetscFunctionReturn(PETSC_SUCCESS); 533 } 534 535 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 536 { 537 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 538 PetscInt64 nz, rnz, i, j; 539 const PetscScalar *av, *v1; 540 PetscScalar *val; 541 PetscMUMPSInt *row, *col; 542 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 543 PetscBool missing; 544 #if defined(PETSC_USE_COMPLEX) 545 PetscBool hermitian, isset; 546 #endif 547 548 PetscFunctionBegin; 549 #if defined(PETSC_USE_COMPLEX) 550 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 551 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 552 #endif 553 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 554 ai = aa->i; 555 aj = aa->j; 556 adiag = aa->diag; 557 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 558 if (reuse == MAT_INITIAL_MATRIX) { 559 /* count nz in the upper triangular part of A */ 560 nz = 0; 561 if (missing) { 562 for (i = 0; i < M; i++) { 563 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 564 for (j = ai[i]; j < ai[i + 1]; j++) { 565 if (aj[j] < i) continue; 566 nz++; 567 } 568 } else { 569 nz += ai[i + 1] - adiag[i]; 570 } 571 } 572 } else { 573 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 574 } 575 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 576 PetscCall(PetscMalloc1(nz, &val)); 577 mumps->nnz = nz; 578 mumps->irn = row; 579 mumps->jcn = col; 580 mumps->val = mumps->val_alloc = val; 581 582 nz = 0; 583 if (missing) { 584 for (i = 0; i < M; i++) { 585 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 586 for (j = ai[i]; j < ai[i + 1]; j++) { 587 if (aj[j] < i) continue; 588 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 589 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 590 val[nz] = av[j]; 591 nz++; 592 } 593 } else { 594 rnz = ai[i + 1] - adiag[i]; 595 ajj = aj + adiag[i]; 596 v1 = av + adiag[i]; 597 for (j = 0; j < rnz; j++) { 598 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 599 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 600 val[nz++] = v1[j]; 601 } 602 } 603 } 604 } else { 605 for (i = 0; i < M; i++) { 606 rnz = ai[i + 1] - adiag[i]; 607 ajj = aj + adiag[i]; 608 v1 = av + adiag[i]; 609 for (j = 0; j < rnz; j++) { 610 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 611 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 612 val[nz++] = v1[j]; 613 } 614 } 615 } 616 } else { 617 nz = 0; 618 val = mumps->val; 619 if (missing) { 620 for (i = 0; i < M; i++) { 621 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 622 for (j = ai[i]; j < ai[i + 1]; j++) { 623 if (aj[j] < i) continue; 624 val[nz++] = av[j]; 625 } 626 } else { 627 rnz = ai[i + 1] - adiag[i]; 628 v1 = av + adiag[i]; 629 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 630 } 631 } 632 } else { 633 for (i = 0; i < M; i++) { 634 rnz = ai[i + 1] - adiag[i]; 635 v1 = av + adiag[i]; 636 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 637 } 638 } 639 } 640 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 641 PetscFunctionReturn(PETSC_SUCCESS); 642 } 643 644 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 645 { 646 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 647 PetscInt bs; 648 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 649 PetscMUMPSInt *row, *col; 650 const PetscScalar *av, *bv, *v1, *v2; 651 PetscScalar *val; 652 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 653 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 654 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 655 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 656 #if defined(PETSC_USE_COMPLEX) 657 PetscBool hermitian, isset; 658 #endif 659 660 PetscFunctionBegin; 661 #if defined(PETSC_USE_COMPLEX) 662 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 663 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 664 #endif 665 PetscCall(MatGetBlockSize(A, &bs)); 666 rstart = A->rmap->rstart; 667 ai = aa->i; 668 aj = aa->j; 669 bi = bb->i; 670 bj = bb->j; 671 av = aa->a; 672 bv = bb->a; 673 674 garray = mat->garray; 675 676 if (reuse == MAT_INITIAL_MATRIX) { 677 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 678 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 679 PetscCall(PetscMalloc1(nz, &val)); 680 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 681 mumps->irn = row; 682 mumps->jcn = col; 683 mumps->val = mumps->val_alloc = val; 684 } else { 685 val = mumps->val; 686 } 687 688 jj = 0; 689 irow = rstart; 690 for (i = 0; i < mbs; i++) { 691 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 692 countA = ai[i + 1] - ai[i]; 693 countB = bi[i + 1] - bi[i]; 694 bjj = bj + bi[i]; 695 v1 = av + ai[i] * bs2; 696 v2 = bv + bi[i] * bs2; 697 698 if (bs > 1) { 699 /* A-part */ 700 for (j = 0; j < countA; j++) { 701 for (k = 0; k < bs; k++) { 702 for (m = 0; m < bs; m++) { 703 if (rstart + ajj[j] * bs > irow || k >= m) { 704 if (reuse == MAT_INITIAL_MATRIX) { 705 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 706 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 707 } 708 val[jj++] = v1[j * bs2 + m + k * bs]; 709 } 710 } 711 } 712 } 713 714 /* B-part */ 715 for (j = 0; j < countB; j++) { 716 for (k = 0; k < bs; k++) { 717 for (m = 0; m < bs; m++) { 718 if (reuse == MAT_INITIAL_MATRIX) { 719 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 720 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 721 } 722 val[jj++] = v2[j * bs2 + m + k * bs]; 723 } 724 } 725 } 726 } else { 727 /* A-part */ 728 for (j = 0; j < countA; j++) { 729 if (reuse == MAT_INITIAL_MATRIX) { 730 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 731 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 732 } 733 val[jj++] = v1[j]; 734 } 735 736 /* B-part */ 737 for (j = 0; j < countB; j++) { 738 if (reuse == MAT_INITIAL_MATRIX) { 739 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 740 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 741 } 742 val[jj++] = v2[j]; 743 } 744 } 745 irow += bs; 746 } 747 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 748 PetscFunctionReturn(PETSC_SUCCESS); 749 } 750 751 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 752 { 753 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 754 PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 755 PetscMUMPSInt *row, *col; 756 const PetscScalar *av, *bv, *v1, *v2; 757 PetscScalar *val; 758 Mat Ad, Ao; 759 Mat_SeqAIJ *aa; 760 Mat_SeqAIJ *bb; 761 762 PetscFunctionBegin; 763 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 764 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 765 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 766 767 aa = (Mat_SeqAIJ *)(Ad)->data; 768 bb = (Mat_SeqAIJ *)(Ao)->data; 769 ai = aa->i; 770 aj = aa->j; 771 bi = bb->i; 772 bj = bb->j; 773 774 rstart = A->rmap->rstart; 775 cstart = A->cmap->rstart; 776 777 if (reuse == MAT_INITIAL_MATRIX) { 778 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 779 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 780 PetscCall(PetscMalloc1(nz, &val)); 781 mumps->nnz = nz; 782 mumps->irn = row; 783 mumps->jcn = col; 784 mumps->val = mumps->val_alloc = val; 785 } else { 786 val = mumps->val; 787 } 788 789 jj = 0; 790 irow = rstart; 791 for (i = 0; i < m; i++) { 792 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 793 countA = ai[i + 1] - ai[i]; 794 countB = bi[i + 1] - bi[i]; 795 bjj = bj + bi[i]; 796 v1 = av + ai[i]; 797 v2 = bv + bi[i]; 798 799 /* A-part */ 800 for (j = 0; j < countA; j++) { 801 if (reuse == MAT_INITIAL_MATRIX) { 802 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 803 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 804 } 805 val[jj++] = v1[j]; 806 } 807 808 /* B-part */ 809 for (j = 0; j < countB; j++) { 810 if (reuse == MAT_INITIAL_MATRIX) { 811 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 812 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 813 } 814 val[jj++] = v2[j]; 815 } 816 irow++; 817 } 818 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 819 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 820 PetscFunctionReturn(PETSC_SUCCESS); 821 } 822 823 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 824 { 825 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 826 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 827 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 828 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 829 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 830 const PetscInt bs2 = mat->bs2; 831 PetscInt bs; 832 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 833 PetscMUMPSInt *row, *col; 834 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 835 PetscScalar *val; 836 837 PetscFunctionBegin; 838 PetscCall(MatGetBlockSize(A, &bs)); 839 if (reuse == MAT_INITIAL_MATRIX) { 840 nz = bs2 * (aa->nz + bb->nz); 841 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 842 PetscCall(PetscMalloc1(nz, &val)); 843 mumps->nnz = nz; 844 mumps->irn = row; 845 mumps->jcn = col; 846 mumps->val = mumps->val_alloc = val; 847 } else { 848 val = mumps->val; 849 } 850 851 jj = 0; 852 irow = rstart; 853 for (i = 0; i < mbs; i++) { 854 countA = ai[i + 1] - ai[i]; 855 countB = bi[i + 1] - bi[i]; 856 ajj = aj + ai[i]; 857 bjj = bj + bi[i]; 858 v1 = av + bs2 * ai[i]; 859 v2 = bv + bs2 * bi[i]; 860 861 idx = 0; 862 /* A-part */ 863 for (k = 0; k < countA; k++) { 864 for (j = 0; j < bs; j++) { 865 for (n = 0; n < bs; n++) { 866 if (reuse == MAT_INITIAL_MATRIX) { 867 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 868 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 869 } 870 val[jj++] = v1[idx++]; 871 } 872 } 873 } 874 875 idx = 0; 876 /* B-part */ 877 for (k = 0; k < countB; k++) { 878 for (j = 0; j < bs; j++) { 879 for (n = 0; n < bs; n++) { 880 if (reuse == MAT_INITIAL_MATRIX) { 881 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 882 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 883 } 884 val[jj++] = v2[idx++]; 885 } 886 } 887 } 888 irow += bs; 889 } 890 PetscFunctionReturn(PETSC_SUCCESS); 891 } 892 893 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 894 { 895 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 896 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 897 PetscMUMPSInt *row, *col; 898 const PetscScalar *av, *bv, *v1, *v2; 899 PetscScalar *val; 900 Mat Ad, Ao; 901 Mat_SeqAIJ *aa; 902 Mat_SeqAIJ *bb; 903 #if defined(PETSC_USE_COMPLEX) 904 PetscBool hermitian, isset; 905 #endif 906 907 PetscFunctionBegin; 908 #if defined(PETSC_USE_COMPLEX) 909 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 910 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 911 #endif 912 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 913 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 914 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 915 916 aa = (Mat_SeqAIJ *)(Ad)->data; 917 bb = (Mat_SeqAIJ *)(Ao)->data; 918 ai = aa->i; 919 aj = aa->j; 920 adiag = aa->diag; 921 bi = bb->i; 922 bj = bb->j; 923 924 rstart = A->rmap->rstart; 925 926 if (reuse == MAT_INITIAL_MATRIX) { 927 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 928 nzb = 0; /* num of upper triangular entries in mat->B */ 929 for (i = 0; i < m; i++) { 930 nza += (ai[i + 1] - adiag[i]); 931 countB = bi[i + 1] - bi[i]; 932 bjj = bj + bi[i]; 933 for (j = 0; j < countB; j++) { 934 if (garray[bjj[j]] > rstart) nzb++; 935 } 936 } 937 938 nz = nza + nzb; /* total nz of upper triangular part of mat */ 939 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 940 PetscCall(PetscMalloc1(nz, &val)); 941 mumps->nnz = nz; 942 mumps->irn = row; 943 mumps->jcn = col; 944 mumps->val = mumps->val_alloc = val; 945 } else { 946 val = mumps->val; 947 } 948 949 jj = 0; 950 irow = rstart; 951 for (i = 0; i < m; i++) { 952 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 953 v1 = av + adiag[i]; 954 countA = ai[i + 1] - adiag[i]; 955 countB = bi[i + 1] - bi[i]; 956 bjj = bj + bi[i]; 957 v2 = bv + bi[i]; 958 959 /* A-part */ 960 for (j = 0; j < countA; j++) { 961 if (reuse == MAT_INITIAL_MATRIX) { 962 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 963 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 964 } 965 val[jj++] = v1[j]; 966 } 967 968 /* B-part */ 969 for (j = 0; j < countB; j++) { 970 if (garray[bjj[j]] > rstart) { 971 if (reuse == MAT_INITIAL_MATRIX) { 972 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 973 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 974 } 975 val[jj++] = v2[j]; 976 } 977 } 978 irow++; 979 } 980 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 981 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 982 PetscFunctionReturn(PETSC_SUCCESS); 983 } 984 985 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 986 { 987 Mat **mats; 988 PetscInt nr, nc; 989 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 990 991 PetscFunctionBegin; 992 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 993 if (reuse == MAT_INITIAL_MATRIX) { 994 PetscMUMPSInt *irns, *jcns; 995 PetscScalar *vals; 996 PetscInt64 totnnz, cumnnz, maxnnz; 997 PetscInt *pjcns_w; 998 IS *rows, *cols; 999 PetscInt **rows_idx, **cols_idx; 1000 1001 cumnnz = 0; 1002 maxnnz = 0; 1003 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1004 for (PetscInt r = 0; r < nr; r++) { 1005 for (PetscInt c = 0; c < nc; c++) { 1006 Mat sub = mats[r][c]; 1007 1008 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1009 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1010 if (sub) { 1011 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1012 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE; 1013 MatInfo info; 1014 1015 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1016 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1017 else { 1018 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1019 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1020 } 1021 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1022 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1023 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1024 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1025 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1026 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1027 1028 if (chol) { 1029 if (r == c) { 1030 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1031 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1032 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1033 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1034 } else { 1035 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1036 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1037 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1038 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1039 } 1040 } else { 1041 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1042 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1043 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1044 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1045 } 1046 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1047 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1048 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1049 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1050 maxnnz = PetscMax(maxnnz, info.nz_used); 1051 } 1052 } 1053 } 1054 1055 /* Allocate total COO */ 1056 totnnz = cumnnz; 1057 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1058 PetscCall(PetscMalloc1(totnnz, &vals)); 1059 1060 /* Handle rows and column maps 1061 We directly map rows and use an SF for the columns */ 1062 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1063 PetscCall(MatNestGetISs(A, rows, cols)); 1064 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1065 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1066 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1067 else (void)maxnnz; 1068 1069 cumnnz = 0; 1070 for (PetscInt r = 0; r < nr; r++) { 1071 for (PetscInt c = 0; c < nc; c++) { 1072 Mat sub = mats[r][c]; 1073 const PetscInt *ridx = rows_idx[r]; 1074 const PetscInt *cidx = cols_idx[c]; 1075 PetscInt rst; 1076 PetscSF csf; 1077 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1078 PetscLayout cmap; 1079 1080 mumps->nest_vals_start[r * nc + c] = cumnnz; 1081 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1082 1083 /* Extract inner blocks if needed */ 1084 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1085 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1086 else { 1087 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1088 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1089 } 1090 swap = (PetscBool)(isTrans || isHTrans); 1091 1092 /* Get column layout to map off-process columns */ 1093 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1094 1095 /* Get row start to map on-process rows */ 1096 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1097 1098 /* Directly use the mumps datastructure and use C ordering for now */ 1099 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1100 1101 /* Swap the role of rows and columns indices for transposed blocks 1102 since we need values with global final ordering */ 1103 if (swap) { 1104 cidx = rows_idx[r]; 1105 ridx = cols_idx[c]; 1106 } 1107 1108 /* Communicate column indices 1109 This could have been done with a single SF but it would have complicated the code a lot. 1110 But since we do it only once, we pay the price of setting up an SF for each block */ 1111 if (PetscDefined(USE_64BIT_INDICES)) { 1112 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1113 } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 1114 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1115 PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1116 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1117 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1118 PetscCall(PetscSFDestroy(&csf)); 1119 1120 /* Import indices: use direct map for rows and mapped indices for columns */ 1121 if (swap) { 1122 for (PetscInt k = 0; k < mumps->nnz; k++) { 1123 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1124 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1125 } 1126 } else { 1127 for (PetscInt k = 0; k < mumps->nnz; k++) { 1128 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1129 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1130 } 1131 } 1132 1133 /* Import values to full COO */ 1134 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1135 if (isHTrans) { /* conjugate the entries */ 1136 PetscScalar *v = vals + cumnnz; 1137 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]); 1138 } 1139 1140 /* Shift new starting point and sanity check */ 1141 cumnnz += mumps->nnz; 1142 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1143 1144 /* Free scratch memory */ 1145 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1146 PetscCall(PetscFree(mumps->val_alloc)); 1147 mumps->val = NULL; 1148 mumps->nnz = 0; 1149 } 1150 } 1151 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1152 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1153 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1154 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1155 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1156 mumps->nest_vals_start[nr * nc] = cumnnz; 1157 1158 /* Set pointers for final MUMPS data structure */ 1159 mumps->nest_vals = vals; 1160 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1161 mumps->val = vals; 1162 mumps->irn = irns; 1163 mumps->jcn = jcns; 1164 mumps->nnz = cumnnz; 1165 } else { 1166 PetscScalar *oval = mumps->nest_vals; 1167 for (PetscInt r = 0; r < nr; r++) { 1168 for (PetscInt c = 0; c < nc; c++) { 1169 PetscBool isTrans, isHTrans = PETSC_FALSE; 1170 Mat sub = mats[r][c]; 1171 PetscInt midx = r * nc + c; 1172 1173 if (!mumps->nest_convert_to_triples[midx]) continue; 1174 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1175 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1176 else { 1177 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1178 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1179 } 1180 mumps->val = oval + mumps->nest_vals_start[midx]; 1181 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1182 if (isHTrans) { 1183 PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1184 for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1185 } 1186 } 1187 } 1188 mumps->val = oval; 1189 } 1190 PetscFunctionReturn(PETSC_SUCCESS); 1191 } 1192 1193 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1194 { 1195 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1196 1197 PetscFunctionBegin; 1198 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1199 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1200 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1201 PetscCall(VecDestroy(&mumps->b_seq)); 1202 PetscCall(VecDestroy(&mumps->x_seq)); 1203 PetscCall(PetscFree(mumps->id.perm_in)); 1204 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1205 PetscCall(PetscFree(mumps->val_alloc)); 1206 PetscCall(PetscFree(mumps->info)); 1207 PetscCall(PetscFree(mumps->ICNTL_pre)); 1208 PetscCall(PetscFree(mumps->CNTL_pre)); 1209 PetscCall(MatMumpsResetSchur_Private(mumps)); 1210 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1211 mumps->id.job = JOB_END; 1212 PetscMUMPS_c(mumps); 1213 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1214 if (mumps->mumps_comm != MPI_COMM_NULL) { 1215 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1216 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1217 } 1218 } 1219 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1220 if (mumps->use_petsc_omp_support) { 1221 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1222 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1223 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1224 } 1225 #endif 1226 PetscCall(PetscFree(mumps->ia_alloc)); 1227 PetscCall(PetscFree(mumps->ja_alloc)); 1228 PetscCall(PetscFree(mumps->recvcount)); 1229 PetscCall(PetscFree(mumps->reqs)); 1230 PetscCall(PetscFree(mumps->irhs_loc)); 1231 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1232 PetscCall(PetscFree(mumps->nest_vals)); 1233 PetscCall(PetscFree(A->data)); 1234 1235 /* clear composed functions */ 1236 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1237 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1238 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1239 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1240 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1241 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1242 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1243 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1244 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1245 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1246 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1247 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1248 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1249 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1250 PetscFunctionReturn(PETSC_SUCCESS); 1251 } 1252 1253 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1254 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1255 { 1256 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1257 const PetscMPIInt ompsize = mumps->omp_comm_size; 1258 PetscInt i, m, M, rstart; 1259 1260 PetscFunctionBegin; 1261 PetscCall(MatGetSize(A, &M, NULL)); 1262 PetscCall(MatGetLocalSize(A, &m, NULL)); 1263 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1264 if (ompsize == 1) { 1265 if (!mumps->irhs_loc) { 1266 mumps->nloc_rhs = m; 1267 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1268 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1269 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1270 } 1271 mumps->id.rhs_loc = (MumpsScalar *)array; 1272 } else { 1273 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1274 const PetscInt *ranges; 1275 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1276 MPI_Group petsc_group, omp_group; 1277 PetscScalar *recvbuf = NULL; 1278 1279 if (mumps->is_omp_master) { 1280 /* Lazily initialize the omp stuff for distributed rhs */ 1281 if (!mumps->irhs_loc) { 1282 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1283 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1284 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1285 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1286 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1287 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1288 1289 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1290 mumps->nloc_rhs = 0; 1291 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1292 for (j = 0; j < ompsize; j++) { 1293 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1294 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1295 } 1296 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1297 for (j = k = 0; j < ompsize; j++) { 1298 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1299 } 1300 1301 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1302 PetscCallMPI(MPI_Group_free(&petsc_group)); 1303 PetscCallMPI(MPI_Group_free(&omp_group)); 1304 } 1305 1306 /* Realloc buffers when current nrhs is bigger than what we have met */ 1307 if (nrhs > mumps->max_nrhs) { 1308 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1309 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1310 mumps->max_nrhs = nrhs; 1311 } 1312 1313 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1314 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1315 mumps->rhs_disps[0] = 0; 1316 for (j = 1; j < ompsize; j++) { 1317 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1318 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1319 } 1320 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1321 } 1322 1323 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1324 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1325 1326 if (mumps->is_omp_master) { 1327 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1328 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1329 for (j = 0; j < ompsize; j++) { 1330 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1331 dst = dstbase; 1332 for (i = 0; i < nrhs; i++) { 1333 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1334 src += mumps->rhs_nrow[j]; 1335 dst += mumps->nloc_rhs; 1336 } 1337 dstbase += mumps->rhs_nrow[j]; 1338 } 1339 } 1340 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1341 } 1342 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1343 } 1344 mumps->id.nrhs = nrhs; 1345 mumps->id.nloc_rhs = mumps->nloc_rhs; 1346 mumps->id.lrhs_loc = mumps->nloc_rhs; 1347 mumps->id.irhs_loc = mumps->irhs_loc; 1348 PetscFunctionReturn(PETSC_SUCCESS); 1349 } 1350 1351 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1352 { 1353 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1354 const PetscScalar *rarray = NULL; 1355 PetscScalar *array; 1356 IS is_iden, is_petsc; 1357 PetscInt i; 1358 PetscBool second_solve = PETSC_FALSE; 1359 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1360 1361 PetscFunctionBegin; 1362 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1363 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1364 &cite1)); 1365 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1366 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1367 &cite2)); 1368 1369 if (A->factorerrortype) { 1370 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1371 PetscCall(VecSetInf(x)); 1372 PetscFunctionReturn(PETSC_SUCCESS); 1373 } 1374 1375 mumps->id.nrhs = 1; 1376 if (mumps->petsc_size > 1) { 1377 if (mumps->ICNTL20 == 10) { 1378 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1379 PetscCall(VecGetArrayRead(b, &rarray)); 1380 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1381 } else { 1382 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1383 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1384 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1385 if (!mumps->myid) { 1386 PetscCall(VecGetArray(mumps->b_seq, &array)); 1387 mumps->id.rhs = (MumpsScalar *)array; 1388 } 1389 } 1390 } else { /* petsc_size == 1 */ 1391 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1392 PetscCall(VecCopy(b, x)); 1393 PetscCall(VecGetArray(x, &array)); 1394 mumps->id.rhs = (MumpsScalar *)array; 1395 } 1396 1397 /* 1398 handle condensation step of Schur complement (if any) 1399 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1400 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1401 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1402 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1403 */ 1404 if (mumps->id.size_schur > 0) { 1405 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1406 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1407 second_solve = PETSC_TRUE; 1408 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1409 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1410 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1411 } 1412 /* solve phase */ 1413 mumps->id.job = JOB_SOLVE; 1414 PetscMUMPS_c(mumps); 1415 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1416 1417 /* handle expansion step of Schur complement (if any) */ 1418 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1419 else if (mumps->id.ICNTL(26) == 1) { 1420 PetscCall(MatMumpsSolveSchur_Private(A)); 1421 for (i = 0; i < mumps->id.size_schur; ++i) { 1422 #if !defined(PETSC_USE_COMPLEX) 1423 PetscScalar val = mumps->id.redrhs[i]; 1424 #else 1425 PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i; 1426 #endif 1427 array[mumps->id.listvar_schur[i] - 1] = val; 1428 } 1429 } 1430 1431 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1432 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1433 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1434 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1435 } 1436 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1437 PetscInt *isol2_loc = NULL; 1438 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1439 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1440 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1441 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1442 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1443 PetscCall(ISDestroy(&is_iden)); 1444 PetscCall(ISDestroy(&is_petsc)); 1445 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1446 } 1447 1448 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1449 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1450 } 1451 1452 if (mumps->petsc_size > 1) { 1453 if (mumps->ICNTL20 == 10) { 1454 PetscCall(VecRestoreArrayRead(b, &rarray)); 1455 } else if (!mumps->myid) { 1456 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1457 } 1458 } else PetscCall(VecRestoreArray(x, &array)); 1459 1460 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1461 PetscFunctionReturn(PETSC_SUCCESS); 1462 } 1463 1464 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1465 { 1466 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1467 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1468 1469 PetscFunctionBegin; 1470 mumps->id.ICNTL(9) = 0; 1471 PetscCall(MatSolve_MUMPS(A, b, x)); 1472 mumps->id.ICNTL(9) = value; 1473 PetscFunctionReturn(PETSC_SUCCESS); 1474 } 1475 1476 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1477 { 1478 Mat Bt = NULL; 1479 PetscBool denseX, denseB, flg, flgT; 1480 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1481 PetscInt i, nrhs, M; 1482 PetscScalar *array; 1483 const PetscScalar *rbray; 1484 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1485 PetscMUMPSInt *isol_loc, *isol_loc_save; 1486 PetscScalar *bray, *sol_loc, *sol_loc_save; 1487 IS is_to, is_from; 1488 PetscInt k, proc, j, m, myrstart; 1489 const PetscInt *rstart; 1490 Vec v_mpi, msol_loc; 1491 VecScatter scat_sol; 1492 Vec b_seq; 1493 VecScatter scat_rhs; 1494 PetscScalar *aa; 1495 PetscInt spnr, *ia, *ja; 1496 Mat_MPIAIJ *b = NULL; 1497 1498 PetscFunctionBegin; 1499 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1500 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1501 1502 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1503 if (denseB) { 1504 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1505 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1506 } else { /* sparse B */ 1507 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1508 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1509 if (flgT) { /* input B is transpose of actual RHS matrix, 1510 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1511 PetscCall(MatTransposeGetMat(B, &Bt)); 1512 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1513 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1514 } 1515 1516 PetscCall(MatGetSize(B, &M, &nrhs)); 1517 mumps->id.nrhs = nrhs; 1518 mumps->id.lrhs = M; 1519 mumps->id.rhs = NULL; 1520 1521 if (mumps->petsc_size == 1) { 1522 PetscScalar *aa; 1523 PetscInt spnr, *ia, *ja; 1524 PetscBool second_solve = PETSC_FALSE; 1525 1526 PetscCall(MatDenseGetArray(X, &array)); 1527 mumps->id.rhs = (MumpsScalar *)array; 1528 1529 if (denseB) { 1530 /* copy B to X */ 1531 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1532 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1533 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1534 } else { /* sparse B */ 1535 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1536 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1537 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1538 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1539 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1540 } 1541 /* handle condensation step of Schur complement (if any) */ 1542 if (mumps->id.size_schur > 0) { 1543 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1544 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1545 second_solve = PETSC_TRUE; 1546 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1547 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1548 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1549 } 1550 /* solve phase */ 1551 mumps->id.job = JOB_SOLVE; 1552 PetscMUMPS_c(mumps); 1553 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1554 1555 /* handle expansion step of Schur complement (if any) */ 1556 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1557 else if (mumps->id.ICNTL(26) == 1) { 1558 PetscCall(MatMumpsSolveSchur_Private(A)); 1559 for (j = 0; j < nrhs; ++j) 1560 for (i = 0; i < mumps->id.size_schur; ++i) { 1561 #if !defined(PETSC_USE_COMPLEX) 1562 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs]; 1563 #else 1564 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i; 1565 #endif 1566 array[mumps->id.listvar_schur[i] - 1 + j * M] = val; 1567 } 1568 } 1569 if (!denseB) { /* sparse B */ 1570 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1571 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1572 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1573 } 1574 PetscCall(MatDenseRestoreArray(X, &array)); 1575 PetscFunctionReturn(PETSC_SUCCESS); 1576 } 1577 1578 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1579 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1580 1581 /* create msol_loc to hold mumps local solution */ 1582 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1583 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1584 1585 lsol_loc = mumps->id.lsol_loc; 1586 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1587 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1588 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1589 mumps->id.isol_loc = isol_loc; 1590 1591 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1592 1593 if (denseB) { 1594 if (mumps->ICNTL20 == 10) { 1595 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1596 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1597 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1598 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1599 PetscCall(MatGetLocalSize(B, &m, NULL)); 1600 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1601 } else { 1602 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1603 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1604 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1605 0, re-arrange B into desired order, which is a local operation. 1606 */ 1607 1608 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1609 /* wrap dense rhs matrix B into a vector v_mpi */ 1610 PetscCall(MatGetLocalSize(B, &m, NULL)); 1611 PetscCall(MatDenseGetArray(B, &bray)); 1612 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1613 PetscCall(MatDenseRestoreArray(B, &bray)); 1614 1615 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1616 if (!mumps->myid) { 1617 PetscInt *idx; 1618 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1619 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1620 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1621 k = 0; 1622 for (proc = 0; proc < mumps->petsc_size; proc++) { 1623 for (j = 0; j < nrhs; j++) { 1624 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1625 } 1626 } 1627 1628 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1629 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1630 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1631 } else { 1632 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1633 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1634 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1635 } 1636 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1637 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1638 PetscCall(ISDestroy(&is_to)); 1639 PetscCall(ISDestroy(&is_from)); 1640 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1641 1642 if (!mumps->myid) { /* define rhs on the host */ 1643 PetscCall(VecGetArray(b_seq, &bray)); 1644 mumps->id.rhs = (MumpsScalar *)bray; 1645 PetscCall(VecRestoreArray(b_seq, &bray)); 1646 } 1647 } 1648 } else { /* sparse B */ 1649 b = (Mat_MPIAIJ *)Bt->data; 1650 1651 /* wrap dense X into a vector v_mpi */ 1652 PetscCall(MatGetLocalSize(X, &m, NULL)); 1653 PetscCall(MatDenseGetArray(X, &bray)); 1654 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1655 PetscCall(MatDenseRestoreArray(X, &bray)); 1656 1657 if (!mumps->myid) { 1658 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1659 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1660 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1661 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1662 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1663 } else { 1664 mumps->id.irhs_ptr = NULL; 1665 mumps->id.irhs_sparse = NULL; 1666 mumps->id.nz_rhs = 0; 1667 mumps->id.rhs_sparse = NULL; 1668 } 1669 } 1670 1671 /* solve phase */ 1672 mumps->id.job = JOB_SOLVE; 1673 PetscMUMPS_c(mumps); 1674 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1675 1676 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1677 PetscCall(MatDenseGetArray(X, &array)); 1678 PetscCall(VecPlaceArray(v_mpi, array)); 1679 1680 /* create scatter scat_sol */ 1681 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1682 /* iidx: index for scatter mumps solution to petsc X */ 1683 1684 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1685 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1686 for (i = 0; i < lsol_loc; i++) { 1687 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1688 1689 for (proc = 0; proc < mumps->petsc_size; proc++) { 1690 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1691 myrstart = rstart[proc]; 1692 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1693 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1694 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1695 break; 1696 } 1697 } 1698 1699 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1700 } 1701 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1702 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1703 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1704 PetscCall(ISDestroy(&is_from)); 1705 PetscCall(ISDestroy(&is_to)); 1706 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1707 PetscCall(MatDenseRestoreArray(X, &array)); 1708 1709 /* free spaces */ 1710 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1711 mumps->id.isol_loc = isol_loc_save; 1712 1713 PetscCall(PetscFree2(sol_loc, isol_loc)); 1714 PetscCall(PetscFree(idxx)); 1715 PetscCall(VecDestroy(&msol_loc)); 1716 PetscCall(VecDestroy(&v_mpi)); 1717 if (!denseB) { 1718 if (!mumps->myid) { 1719 b = (Mat_MPIAIJ *)Bt->data; 1720 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1721 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1722 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1723 } 1724 } else { 1725 if (mumps->ICNTL20 == 0) { 1726 PetscCall(VecDestroy(&b_seq)); 1727 PetscCall(VecScatterDestroy(&scat_rhs)); 1728 } 1729 } 1730 PetscCall(VecScatterDestroy(&scat_sol)); 1731 PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)))); 1732 PetscFunctionReturn(PETSC_SUCCESS); 1733 } 1734 1735 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1736 { 1737 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1738 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1739 1740 PetscFunctionBegin; 1741 mumps->id.ICNTL(9) = 0; 1742 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1743 mumps->id.ICNTL(9) = value; 1744 PetscFunctionReturn(PETSC_SUCCESS); 1745 } 1746 1747 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1748 { 1749 PetscBool flg; 1750 Mat B; 1751 1752 PetscFunctionBegin; 1753 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1754 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1755 1756 /* Create B=Bt^T that uses Bt's data structure */ 1757 PetscCall(MatCreateTranspose(Bt, &B)); 1758 1759 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1760 PetscCall(MatDestroy(&B)); 1761 PetscFunctionReturn(PETSC_SUCCESS); 1762 } 1763 1764 #if !defined(PETSC_USE_COMPLEX) 1765 /* 1766 input: 1767 F: numeric factor 1768 output: 1769 nneg: total number of negative pivots 1770 nzero: total number of zero pivots 1771 npos: (global dimension of F) - nneg - nzero 1772 */ 1773 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1774 { 1775 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1776 PetscMPIInt size; 1777 1778 PetscFunctionBegin; 1779 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1780 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1781 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1782 1783 if (nneg) *nneg = mumps->id.INFOG(12); 1784 if (nzero || npos) { 1785 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1786 if (nzero) *nzero = mumps->id.INFOG(28); 1787 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1788 } 1789 PetscFunctionReturn(PETSC_SUCCESS); 1790 } 1791 #endif 1792 1793 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1794 { 1795 PetscInt i, nreqs; 1796 PetscMUMPSInt *irn, *jcn; 1797 PetscMPIInt count; 1798 PetscInt64 totnnz, remain; 1799 const PetscInt osize = mumps->omp_comm_size; 1800 PetscScalar *val; 1801 1802 PetscFunctionBegin; 1803 if (osize > 1) { 1804 if (reuse == MAT_INITIAL_MATRIX) { 1805 /* master first gathers counts of nonzeros to receive */ 1806 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1807 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1808 1809 /* Then each computes number of send/recvs */ 1810 if (mumps->is_omp_master) { 1811 /* Start from 1 since self communication is not done in MPI */ 1812 nreqs = 0; 1813 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1814 } else { 1815 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1816 } 1817 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1818 1819 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1820 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1821 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1822 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1823 */ 1824 nreqs = 0; /* counter for actual send/recvs */ 1825 if (mumps->is_omp_master) { 1826 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1827 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1828 PetscCall(PetscMalloc1(totnnz, &val)); 1829 1830 /* Self communication */ 1831 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1832 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1833 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1834 1835 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1836 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1837 PetscCall(PetscFree(mumps->val_alloc)); 1838 mumps->nnz = totnnz; 1839 mumps->irn = irn; 1840 mumps->jcn = jcn; 1841 mumps->val = mumps->val_alloc = val; 1842 1843 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1844 jcn += mumps->recvcount[0]; 1845 val += mumps->recvcount[0]; 1846 1847 /* Remote communication */ 1848 for (i = 1; i < osize; i++) { 1849 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1850 remain = mumps->recvcount[i] - count; 1851 while (count > 0) { 1852 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1853 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1854 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1855 irn += count; 1856 jcn += count; 1857 val += count; 1858 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1859 remain -= count; 1860 } 1861 } 1862 } else { 1863 irn = mumps->irn; 1864 jcn = mumps->jcn; 1865 val = mumps->val; 1866 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1867 remain = mumps->nnz - count; 1868 while (count > 0) { 1869 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1870 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1871 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1872 irn += count; 1873 jcn += count; 1874 val += count; 1875 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1876 remain -= count; 1877 } 1878 } 1879 } else { 1880 nreqs = 0; 1881 if (mumps->is_omp_master) { 1882 val = mumps->val + mumps->recvcount[0]; 1883 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1884 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1885 remain = mumps->recvcount[i] - count; 1886 while (count > 0) { 1887 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1888 val += count; 1889 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1890 remain -= count; 1891 } 1892 } 1893 } else { 1894 val = mumps->val; 1895 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1896 remain = mumps->nnz - count; 1897 while (count > 0) { 1898 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1899 val += count; 1900 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1901 remain -= count; 1902 } 1903 } 1904 } 1905 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1906 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1907 } 1908 PetscFunctionReturn(PETSC_SUCCESS); 1909 } 1910 1911 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1912 { 1913 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1914 PetscBool isMPIAIJ; 1915 1916 PetscFunctionBegin; 1917 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1918 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1919 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1920 PetscFunctionReturn(PETSC_SUCCESS); 1921 } 1922 1923 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1924 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1925 1926 /* numerical factorization phase */ 1927 mumps->id.job = JOB_FACTNUMERIC; 1928 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1929 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1930 } else { 1931 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1932 } 1933 PetscMUMPS_c(mumps); 1934 if (mumps->id.INFOG(1) < 0) { 1935 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 1936 if (mumps->id.INFOG(1) == -10) { 1937 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1938 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1939 } else if (mumps->id.INFOG(1) == -13) { 1940 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1941 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1942 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1943 PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1944 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1945 } else { 1946 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1947 F->factorerrortype = MAT_FACTOR_OTHER; 1948 } 1949 } 1950 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 1951 1952 F->assembled = PETSC_TRUE; 1953 1954 if (F->schur) { /* reset Schur status to unfactored */ 1955 #if defined(PETSC_HAVE_CUDA) 1956 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1957 #endif 1958 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1959 mumps->id.ICNTL(19) = 2; 1960 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1961 } 1962 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1963 } 1964 1965 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1966 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1967 1968 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1969 if (mumps->petsc_size > 1) { 1970 PetscInt lsol_loc; 1971 PetscScalar *sol_loc; 1972 1973 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1974 1975 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1976 if (mumps->x_seq) { 1977 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1978 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1979 PetscCall(VecDestroy(&mumps->x_seq)); 1980 } 1981 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1982 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1983 mumps->id.lsol_loc = lsol_loc; 1984 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1985 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 1986 } 1987 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1988 PetscFunctionReturn(PETSC_SUCCESS); 1989 } 1990 1991 /* Sets MUMPS options from the options database */ 1992 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1993 { 1994 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1995 PetscMUMPSInt icntl = 0, size, *listvar_schur; 1996 PetscInt info[80], i, ninfo = 80, rbs, cbs; 1997 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1998 MumpsScalar *arr; 1999 2000 PetscFunctionBegin; 2001 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2002 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2003 PetscInt nthreads = 0; 2004 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2005 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2006 2007 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2008 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2009 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2010 2011 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2012 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2013 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2014 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2015 if (mumps->use_petsc_omp_support) { 2016 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2017 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2018 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2019 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2020 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2021 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2022 #endif 2023 } else { 2024 mumps->omp_comm = PETSC_COMM_SELF; 2025 mumps->mumps_comm = mumps->petsc_comm; 2026 mumps->is_omp_master = PETSC_TRUE; 2027 } 2028 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2029 mumps->reqs = NULL; 2030 mumps->tag = 0; 2031 2032 if (mumps->mumps_comm != MPI_COMM_NULL) { 2033 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2034 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2035 MPI_Comm comm; 2036 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2037 mumps->mumps_comm = comm; 2038 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2039 } 2040 2041 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2042 mumps->id.job = JOB_INIT; 2043 mumps->id.par = 1; /* host participates factorizaton and solve */ 2044 mumps->id.sym = mumps->sym; 2045 2046 size = mumps->id.size_schur; 2047 arr = mumps->id.schur; 2048 listvar_schur = mumps->id.listvar_schur; 2049 PetscMUMPS_c(mumps); 2050 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2051 /* restore cached ICNTL and CNTL values */ 2052 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2053 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2054 PetscCall(PetscFree(mumps->ICNTL_pre)); 2055 PetscCall(PetscFree(mumps->CNTL_pre)); 2056 2057 if (schur) { 2058 mumps->id.size_schur = size; 2059 mumps->id.schur_lld = size; 2060 mumps->id.schur = arr; 2061 mumps->id.listvar_schur = listvar_schur; 2062 if (mumps->petsc_size > 1) { 2063 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2064 2065 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2066 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2067 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2068 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2069 } else { 2070 if (F->factortype == MAT_FACTOR_LU) { 2071 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2072 } else { 2073 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2074 } 2075 } 2076 mumps->id.ICNTL(26) = -1; 2077 } 2078 2079 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2080 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2081 */ 2082 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2083 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2084 2085 mumps->scat_rhs = NULL; 2086 mumps->scat_sol = NULL; 2087 2088 /* set PETSc-MUMPS default options - override MUMPS default */ 2089 mumps->id.ICNTL(3) = 0; 2090 mumps->id.ICNTL(4) = 0; 2091 if (mumps->petsc_size == 1) { 2092 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2093 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2094 } else { 2095 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2096 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2097 } 2098 } 2099 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2100 if (flg) mumps->id.ICNTL(1) = icntl; 2101 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2102 if (flg) mumps->id.ICNTL(2) = icntl; 2103 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2104 if (flg) mumps->id.ICNTL(3) = icntl; 2105 2106 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2107 if (flg) mumps->id.ICNTL(4) = icntl; 2108 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2109 2110 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2111 if (flg) mumps->id.ICNTL(6) = icntl; 2112 2113 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2114 if (flg) { 2115 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2116 mumps->id.ICNTL(7) = icntl; 2117 } 2118 2119 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2120 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2121 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2122 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2123 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2124 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2125 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2126 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2127 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2128 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2129 if (flg) { 2130 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2131 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2132 } 2133 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2134 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2135 PetscCall(MatDestroy(&F->schur)); 2136 PetscCall(MatMumpsResetSchur_Private(mumps)); 2137 } 2138 2139 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2140 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2141 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2142 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2143 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2144 In short, we could not use distributed RHS with MPICH until v4.0b1. 2145 */ 2146 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 2147 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2148 #else 2149 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2150 #endif 2151 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2152 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2153 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2154 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2155 #endif 2156 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2157 2158 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2159 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2160 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2161 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2162 2163 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2164 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2165 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2166 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2167 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2168 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2169 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2170 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2171 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2172 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2173 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2174 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2175 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2176 2177 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2178 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2179 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2180 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2181 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2182 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2183 2184 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2185 2186 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2187 if (ninfo) { 2188 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2189 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2190 mumps->ninfo = ninfo; 2191 for (i = 0; i < ninfo; i++) { 2192 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2193 mumps->info[i] = info[i]; 2194 } 2195 } 2196 PetscOptionsEnd(); 2197 PetscFunctionReturn(PETSC_SUCCESS); 2198 } 2199 2200 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2201 { 2202 PetscFunctionBegin; 2203 if (mumps->id.INFOG(1) < 0) { 2204 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2205 if (mumps->id.INFOG(1) == -6) { 2206 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2207 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2208 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2209 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2210 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2211 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2212 PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n")); 2213 } else { 2214 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2215 F->factorerrortype = MAT_FACTOR_OTHER; 2216 } 2217 } 2218 PetscFunctionReturn(PETSC_SUCCESS); 2219 } 2220 2221 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2222 { 2223 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2224 Vec b; 2225 const PetscInt M = A->rmap->N; 2226 2227 PetscFunctionBegin; 2228 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2229 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2230 PetscFunctionReturn(PETSC_SUCCESS); 2231 } 2232 2233 /* Set MUMPS options from the options database */ 2234 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2235 2236 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2237 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2238 2239 /* analysis phase */ 2240 mumps->id.job = JOB_FACTSYMBOLIC; 2241 mumps->id.n = M; 2242 switch (mumps->id.ICNTL(18)) { 2243 case 0: /* centralized assembled matrix input */ 2244 if (!mumps->myid) { 2245 mumps->id.nnz = mumps->nnz; 2246 mumps->id.irn = mumps->irn; 2247 mumps->id.jcn = mumps->jcn; 2248 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2249 if (r) { 2250 mumps->id.ICNTL(7) = 1; 2251 if (!mumps->myid) { 2252 const PetscInt *idx; 2253 PetscInt i; 2254 2255 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2256 PetscCall(ISGetIndices(r, &idx)); 2257 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2258 PetscCall(ISRestoreIndices(r, &idx)); 2259 } 2260 } 2261 } 2262 break; 2263 case 3: /* distributed assembled matrix input (size>1) */ 2264 mumps->id.nnz_loc = mumps->nnz; 2265 mumps->id.irn_loc = mumps->irn; 2266 mumps->id.jcn_loc = mumps->jcn; 2267 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2268 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2269 PetscCall(MatCreateVecs(A, NULL, &b)); 2270 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2271 PetscCall(VecDestroy(&b)); 2272 } 2273 break; 2274 } 2275 PetscMUMPS_c(mumps); 2276 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2277 2278 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2279 F->ops->solve = MatSolve_MUMPS; 2280 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2281 F->ops->matsolve = MatMatSolve_MUMPS; 2282 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2283 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2284 2285 mumps->matstruc = SAME_NONZERO_PATTERN; 2286 PetscFunctionReturn(PETSC_SUCCESS); 2287 } 2288 2289 /* Note the Petsc r and c permutations are ignored */ 2290 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2291 { 2292 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2293 Vec b; 2294 const PetscInt M = A->rmap->N; 2295 2296 PetscFunctionBegin; 2297 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2298 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2299 PetscFunctionReturn(PETSC_SUCCESS); 2300 } 2301 2302 /* Set MUMPS options from the options database */ 2303 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2304 2305 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2306 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2307 2308 /* analysis phase */ 2309 mumps->id.job = JOB_FACTSYMBOLIC; 2310 mumps->id.n = M; 2311 switch (mumps->id.ICNTL(18)) { 2312 case 0: /* centralized assembled matrix input */ 2313 if (!mumps->myid) { 2314 mumps->id.nnz = mumps->nnz; 2315 mumps->id.irn = mumps->irn; 2316 mumps->id.jcn = mumps->jcn; 2317 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2318 } 2319 break; 2320 case 3: /* distributed assembled matrix input (size>1) */ 2321 mumps->id.nnz_loc = mumps->nnz; 2322 mumps->id.irn_loc = mumps->irn; 2323 mumps->id.jcn_loc = mumps->jcn; 2324 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2325 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2326 PetscCall(MatCreateVecs(A, NULL, &b)); 2327 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2328 PetscCall(VecDestroy(&b)); 2329 } 2330 break; 2331 } 2332 PetscMUMPS_c(mumps); 2333 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2334 2335 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2336 F->ops->solve = MatSolve_MUMPS; 2337 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2338 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2339 2340 mumps->matstruc = SAME_NONZERO_PATTERN; 2341 PetscFunctionReturn(PETSC_SUCCESS); 2342 } 2343 2344 /* Note the Petsc r permutation and factor info are ignored */ 2345 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2346 { 2347 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2348 Vec b; 2349 const PetscInt M = A->rmap->N; 2350 2351 PetscFunctionBegin; 2352 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2353 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2354 PetscFunctionReturn(PETSC_SUCCESS); 2355 } 2356 2357 /* Set MUMPS options from the options database */ 2358 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2359 2360 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2361 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2362 2363 /* analysis phase */ 2364 mumps->id.job = JOB_FACTSYMBOLIC; 2365 mumps->id.n = M; 2366 switch (mumps->id.ICNTL(18)) { 2367 case 0: /* centralized assembled matrix input */ 2368 if (!mumps->myid) { 2369 mumps->id.nnz = mumps->nnz; 2370 mumps->id.irn = mumps->irn; 2371 mumps->id.jcn = mumps->jcn; 2372 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2373 } 2374 break; 2375 case 3: /* distributed assembled matrix input (size>1) */ 2376 mumps->id.nnz_loc = mumps->nnz; 2377 mumps->id.irn_loc = mumps->irn; 2378 mumps->id.jcn_loc = mumps->jcn; 2379 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2380 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2381 PetscCall(MatCreateVecs(A, NULL, &b)); 2382 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2383 PetscCall(VecDestroy(&b)); 2384 } 2385 break; 2386 } 2387 PetscMUMPS_c(mumps); 2388 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2389 2390 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2391 F->ops->solve = MatSolve_MUMPS; 2392 F->ops->solvetranspose = MatSolve_MUMPS; 2393 F->ops->matsolve = MatMatSolve_MUMPS; 2394 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2395 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2396 #if defined(PETSC_USE_COMPLEX) 2397 F->ops->getinertia = NULL; 2398 #else 2399 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2400 #endif 2401 2402 mumps->matstruc = SAME_NONZERO_PATTERN; 2403 PetscFunctionReturn(PETSC_SUCCESS); 2404 } 2405 2406 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2407 { 2408 PetscBool iascii; 2409 PetscViewerFormat format; 2410 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2411 2412 PetscFunctionBegin; 2413 /* check if matrix is mumps type */ 2414 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2415 2416 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2417 if (iascii) { 2418 PetscCall(PetscViewerGetFormat(viewer, &format)); 2419 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2420 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2421 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2422 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2423 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2424 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2425 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2426 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2427 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2428 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2429 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2430 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2431 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2432 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2433 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2434 if (mumps->id.ICNTL(11) > 0) { 2435 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2436 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2437 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2438 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2439 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2440 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2441 } 2442 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2443 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2444 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2445 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2446 /* ICNTL(15-17) not used */ 2447 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2448 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2449 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2450 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2451 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2452 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2453 2454 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2455 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2456 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2457 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2458 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2459 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2460 2461 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2462 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2463 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2464 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2465 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2466 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2467 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2468 2469 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2470 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2471 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2472 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2473 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2474 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2475 2476 /* information local to each processor */ 2477 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2478 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2479 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2480 PetscCall(PetscViewerFlush(viewer)); 2481 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2482 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2483 PetscCall(PetscViewerFlush(viewer)); 2484 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2485 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2486 PetscCall(PetscViewerFlush(viewer)); 2487 2488 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2489 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2490 PetscCall(PetscViewerFlush(viewer)); 2491 2492 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2493 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2494 PetscCall(PetscViewerFlush(viewer)); 2495 2496 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2497 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2498 PetscCall(PetscViewerFlush(viewer)); 2499 2500 if (mumps->ninfo && mumps->ninfo <= 80) { 2501 PetscInt i; 2502 for (i = 0; i < mumps->ninfo; i++) { 2503 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2504 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2505 PetscCall(PetscViewerFlush(viewer)); 2506 } 2507 } 2508 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2509 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2510 2511 if (mumps->myid == 0) { /* information from the host */ 2512 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2513 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2514 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2515 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2516 2517 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2518 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2519 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2520 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2521 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2522 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2523 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2524 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2525 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2526 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2527 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2528 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2529 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2530 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2531 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2532 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2533 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2534 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2535 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2536 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2537 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2538 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2539 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2540 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2541 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2542 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2543 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2544 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2545 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2546 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2547 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2548 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2549 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2550 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2551 } 2552 } 2553 } 2554 PetscFunctionReturn(PETSC_SUCCESS); 2555 } 2556 2557 static PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2558 { 2559 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2560 2561 PetscFunctionBegin; 2562 info->block_size = 1.0; 2563 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2564 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2565 info->nz_unneeded = 0.0; 2566 info->assemblies = 0.0; 2567 info->mallocs = 0.0; 2568 info->memory = 0.0; 2569 info->fill_ratio_given = 0; 2570 info->fill_ratio_needed = 0; 2571 info->factor_mallocs = 0; 2572 PetscFunctionReturn(PETSC_SUCCESS); 2573 } 2574 2575 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2576 { 2577 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2578 const PetscScalar *arr; 2579 const PetscInt *idxs; 2580 PetscInt size, i; 2581 2582 PetscFunctionBegin; 2583 PetscCall(ISGetLocalSize(is, &size)); 2584 /* Schur complement matrix */ 2585 PetscCall(MatDestroy(&F->schur)); 2586 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2587 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2588 mumps->id.schur = (MumpsScalar *)arr; 2589 mumps->id.size_schur = size; 2590 mumps->id.schur_lld = size; 2591 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2592 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2593 2594 /* MUMPS expects Fortran style indices */ 2595 PetscCall(PetscFree(mumps->id.listvar_schur)); 2596 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2597 PetscCall(ISGetIndices(is, &idxs)); 2598 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2599 PetscCall(ISRestoreIndices(is, &idxs)); 2600 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2601 mumps->id.ICNTL(26) = -1; 2602 PetscFunctionReturn(PETSC_SUCCESS); 2603 } 2604 2605 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2606 { 2607 Mat St; 2608 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2609 PetscScalar *array; 2610 2611 PetscFunctionBegin; 2612 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 2613 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2614 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2615 PetscCall(MatSetType(St, MATDENSE)); 2616 PetscCall(MatSetUp(St)); 2617 PetscCall(MatDenseGetArray(St, &array)); 2618 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2619 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2620 PetscInt i, j, N = mumps->id.size_schur; 2621 for (i = 0; i < N; i++) { 2622 for (j = 0; j < N; j++) { 2623 #if !defined(PETSC_USE_COMPLEX) 2624 PetscScalar val = mumps->id.schur[i * N + j]; 2625 #else 2626 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2627 #endif 2628 array[j * N + i] = val; 2629 } 2630 } 2631 } else { /* stored by columns */ 2632 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2633 } 2634 } else { /* either full or lower-triangular (not packed) */ 2635 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2636 PetscInt i, j, N = mumps->id.size_schur; 2637 for (i = 0; i < N; i++) { 2638 for (j = i; j < N; j++) { 2639 #if !defined(PETSC_USE_COMPLEX) 2640 PetscScalar val = mumps->id.schur[i * N + j]; 2641 #else 2642 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2643 #endif 2644 array[i * N + j] = array[j * N + i] = val; 2645 } 2646 } 2647 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2648 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2649 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2650 PetscInt i, j, N = mumps->id.size_schur; 2651 for (i = 0; i < N; i++) { 2652 for (j = 0; j < i + 1; j++) { 2653 #if !defined(PETSC_USE_COMPLEX) 2654 PetscScalar val = mumps->id.schur[i * N + j]; 2655 #else 2656 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2657 #endif 2658 array[i * N + j] = array[j * N + i] = val; 2659 } 2660 } 2661 } 2662 } 2663 PetscCall(MatDenseRestoreArray(St, &array)); 2664 *S = St; 2665 PetscFunctionReturn(PETSC_SUCCESS); 2666 } 2667 2668 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2669 { 2670 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2671 2672 PetscFunctionBegin; 2673 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2674 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2675 for (i = 0; i < nICNTL_pre; ++i) 2676 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2677 if (i == nICNTL_pre) { /* not already cached */ 2678 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2679 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2680 mumps->ICNTL_pre[0]++; 2681 } 2682 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2683 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2684 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2685 PetscFunctionReturn(PETSC_SUCCESS); 2686 } 2687 2688 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2689 { 2690 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2691 2692 PetscFunctionBegin; 2693 if (mumps->id.job == JOB_NULL) { 2694 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2695 *ival = 0; 2696 for (i = 0; i < nICNTL_pre; ++i) { 2697 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2698 } 2699 } else *ival = mumps->id.ICNTL(icntl); 2700 PetscFunctionReturn(PETSC_SUCCESS); 2701 } 2702 2703 /*@ 2704 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2705 2706 Logically Collective 2707 2708 Input Parameters: 2709 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2710 . icntl - index of MUMPS parameter array ICNTL() 2711 - ival - value of MUMPS ICNTL(icntl) 2712 2713 Options Database Key: 2714 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2715 2716 Level: beginner 2717 2718 References: 2719 . * - MUMPS Users' Guide 2720 2721 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2722 @*/ 2723 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2724 { 2725 PetscFunctionBegin; 2726 PetscValidType(F, 1); 2727 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2728 PetscValidLogicalCollectiveInt(F, icntl, 2); 2729 PetscValidLogicalCollectiveInt(F, ival, 3); 2730 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2731 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2732 PetscFunctionReturn(PETSC_SUCCESS); 2733 } 2734 2735 /*@ 2736 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2737 2738 Logically Collective 2739 2740 Input Parameters: 2741 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2742 - icntl - index of MUMPS parameter array ICNTL() 2743 2744 Output Parameter: 2745 . ival - value of MUMPS ICNTL(icntl) 2746 2747 Level: beginner 2748 2749 References: 2750 . * - MUMPS Users' Guide 2751 2752 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2753 @*/ 2754 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2755 { 2756 PetscFunctionBegin; 2757 PetscValidType(F, 1); 2758 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2759 PetscValidLogicalCollectiveInt(F, icntl, 2); 2760 PetscAssertPointer(ival, 3); 2761 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2762 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2763 PetscFunctionReturn(PETSC_SUCCESS); 2764 } 2765 2766 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2767 { 2768 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2769 2770 PetscFunctionBegin; 2771 if (mumps->id.job == JOB_NULL) { 2772 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2773 for (i = 0; i < nCNTL_pre; ++i) 2774 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2775 if (i == nCNTL_pre) { 2776 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2777 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2778 mumps->CNTL_pre[0]++; 2779 } 2780 mumps->CNTL_pre[1 + 2 * i] = icntl; 2781 mumps->CNTL_pre[2 + 2 * i] = val; 2782 } else mumps->id.CNTL(icntl) = val; 2783 PetscFunctionReturn(PETSC_SUCCESS); 2784 } 2785 2786 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2787 { 2788 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2789 2790 PetscFunctionBegin; 2791 if (mumps->id.job == JOB_NULL) { 2792 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2793 *val = 0.0; 2794 for (i = 0; i < nCNTL_pre; ++i) { 2795 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2796 } 2797 } else *val = mumps->id.CNTL(icntl); 2798 PetscFunctionReturn(PETSC_SUCCESS); 2799 } 2800 2801 /*@ 2802 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2803 2804 Logically Collective 2805 2806 Input Parameters: 2807 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2808 . icntl - index of MUMPS parameter array CNTL() 2809 - val - value of MUMPS CNTL(icntl) 2810 2811 Options Database Key: 2812 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2813 2814 Level: beginner 2815 2816 References: 2817 . * - MUMPS Users' Guide 2818 2819 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2820 @*/ 2821 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2822 { 2823 PetscFunctionBegin; 2824 PetscValidType(F, 1); 2825 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2826 PetscValidLogicalCollectiveInt(F, icntl, 2); 2827 PetscValidLogicalCollectiveReal(F, val, 3); 2828 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2829 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2830 PetscFunctionReturn(PETSC_SUCCESS); 2831 } 2832 2833 /*@ 2834 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2835 2836 Logically Collective 2837 2838 Input Parameters: 2839 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2840 - icntl - index of MUMPS parameter array CNTL() 2841 2842 Output Parameter: 2843 . val - value of MUMPS CNTL(icntl) 2844 2845 Level: beginner 2846 2847 References: 2848 . * - MUMPS Users' Guide 2849 2850 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2851 @*/ 2852 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2853 { 2854 PetscFunctionBegin; 2855 PetscValidType(F, 1); 2856 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2857 PetscValidLogicalCollectiveInt(F, icntl, 2); 2858 PetscAssertPointer(val, 3); 2859 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2860 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2861 PetscFunctionReturn(PETSC_SUCCESS); 2862 } 2863 2864 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2865 { 2866 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2867 2868 PetscFunctionBegin; 2869 *info = mumps->id.INFO(icntl); 2870 PetscFunctionReturn(PETSC_SUCCESS); 2871 } 2872 2873 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2874 { 2875 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2876 2877 PetscFunctionBegin; 2878 *infog = mumps->id.INFOG(icntl); 2879 PetscFunctionReturn(PETSC_SUCCESS); 2880 } 2881 2882 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2883 { 2884 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2885 2886 PetscFunctionBegin; 2887 *rinfo = mumps->id.RINFO(icntl); 2888 PetscFunctionReturn(PETSC_SUCCESS); 2889 } 2890 2891 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2892 { 2893 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2894 2895 PetscFunctionBegin; 2896 *rinfog = mumps->id.RINFOG(icntl); 2897 PetscFunctionReturn(PETSC_SUCCESS); 2898 } 2899 2900 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2901 { 2902 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2903 2904 PetscFunctionBegin; 2905 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2906 *size = 0; 2907 *array = NULL; 2908 if (!mumps->myid) { 2909 *size = mumps->id.INFOG(28); 2910 PetscCall(PetscMalloc1(*size, array)); 2911 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2912 } 2913 PetscFunctionReturn(PETSC_SUCCESS); 2914 } 2915 2916 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2917 { 2918 Mat Bt = NULL, Btseq = NULL; 2919 PetscBool flg; 2920 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2921 PetscScalar *aa; 2922 PetscInt spnr, *ia, *ja, M, nrhs; 2923 2924 PetscFunctionBegin; 2925 PetscAssertPointer(spRHS, 2); 2926 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2927 if (flg) { 2928 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2929 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2930 2931 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2932 2933 if (mumps->petsc_size > 1) { 2934 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2935 Btseq = b->A; 2936 } else { 2937 Btseq = Bt; 2938 } 2939 2940 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2941 mumps->id.nrhs = nrhs; 2942 mumps->id.lrhs = M; 2943 mumps->id.rhs = NULL; 2944 2945 if (!mumps->myid) { 2946 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2947 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2948 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2949 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2950 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2951 } else { 2952 mumps->id.irhs_ptr = NULL; 2953 mumps->id.irhs_sparse = NULL; 2954 mumps->id.nz_rhs = 0; 2955 mumps->id.rhs_sparse = NULL; 2956 } 2957 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2958 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2959 2960 /* solve phase */ 2961 mumps->id.job = JOB_SOLVE; 2962 PetscMUMPS_c(mumps); 2963 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2964 2965 if (!mumps->myid) { 2966 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 2967 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2968 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2969 } 2970 PetscFunctionReturn(PETSC_SUCCESS); 2971 } 2972 2973 /*@ 2974 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 2975 2976 Logically Collective 2977 2978 Input Parameter: 2979 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2980 2981 Output Parameter: 2982 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 2983 2984 Level: beginner 2985 2986 References: 2987 . * - MUMPS Users' Guide 2988 2989 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 2990 @*/ 2991 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2992 { 2993 PetscFunctionBegin; 2994 PetscValidType(F, 1); 2995 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2996 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 2997 PetscFunctionReturn(PETSC_SUCCESS); 2998 } 2999 3000 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3001 { 3002 Mat spRHS; 3003 3004 PetscFunctionBegin; 3005 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3006 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3007 PetscCall(MatDestroy(&spRHS)); 3008 PetscFunctionReturn(PETSC_SUCCESS); 3009 } 3010 3011 /*@ 3012 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 3013 3014 Logically Collective 3015 3016 Input Parameter: 3017 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3018 3019 Output Parameter: 3020 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3021 3022 Level: beginner 3023 3024 References: 3025 . * - MUMPS Users' Guide 3026 3027 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3028 @*/ 3029 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3030 { 3031 PetscBool flg; 3032 3033 PetscFunctionBegin; 3034 PetscValidType(F, 1); 3035 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3036 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3037 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3038 3039 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3040 PetscFunctionReturn(PETSC_SUCCESS); 3041 } 3042 3043 /*@ 3044 MatMumpsGetInfo - Get MUMPS parameter INFO() 3045 3046 Logically Collective 3047 3048 Input Parameters: 3049 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3050 - icntl - index of MUMPS parameter array INFO() 3051 3052 Output Parameter: 3053 . ival - value of MUMPS INFO(icntl) 3054 3055 Level: beginner 3056 3057 References: 3058 . * - MUMPS Users' Guide 3059 3060 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3061 @*/ 3062 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3063 { 3064 PetscFunctionBegin; 3065 PetscValidType(F, 1); 3066 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3067 PetscAssertPointer(ival, 3); 3068 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3069 PetscFunctionReturn(PETSC_SUCCESS); 3070 } 3071 3072 /*@ 3073 MatMumpsGetInfog - Get MUMPS parameter INFOG() 3074 3075 Logically Collective 3076 3077 Input Parameters: 3078 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3079 - icntl - index of MUMPS parameter array INFOG() 3080 3081 Output Parameter: 3082 . ival - value of MUMPS INFOG(icntl) 3083 3084 Level: beginner 3085 3086 References: 3087 . * - MUMPS Users' Guide 3088 3089 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3090 @*/ 3091 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3092 { 3093 PetscFunctionBegin; 3094 PetscValidType(F, 1); 3095 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3096 PetscAssertPointer(ival, 3); 3097 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3098 PetscFunctionReturn(PETSC_SUCCESS); 3099 } 3100 3101 /*@ 3102 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3103 3104 Logically Collective 3105 3106 Input Parameters: 3107 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3108 - icntl - index of MUMPS parameter array RINFO() 3109 3110 Output Parameter: 3111 . val - value of MUMPS RINFO(icntl) 3112 3113 Level: beginner 3114 3115 References: 3116 . * - MUMPS Users' Guide 3117 3118 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3119 @*/ 3120 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3121 { 3122 PetscFunctionBegin; 3123 PetscValidType(F, 1); 3124 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3125 PetscAssertPointer(val, 3); 3126 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3127 PetscFunctionReturn(PETSC_SUCCESS); 3128 } 3129 3130 /*@ 3131 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3132 3133 Logically Collective 3134 3135 Input Parameters: 3136 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3137 - icntl - index of MUMPS parameter array RINFOG() 3138 3139 Output Parameter: 3140 . val - value of MUMPS RINFOG(icntl) 3141 3142 Level: beginner 3143 3144 References: 3145 . * - MUMPS Users' Guide 3146 3147 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3148 @*/ 3149 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3150 { 3151 PetscFunctionBegin; 3152 PetscValidType(F, 1); 3153 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3154 PetscAssertPointer(val, 3); 3155 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3156 PetscFunctionReturn(PETSC_SUCCESS); 3157 } 3158 3159 /*@ 3160 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 3161 3162 Logically Collective 3163 3164 Input Parameter: 3165 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3166 3167 Output Parameters: 3168 + size - local size of the array. The size of the array is non-zero only on the host. 3169 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3170 for freeing this array. 3171 3172 Level: beginner 3173 3174 References: 3175 . * - MUMPS Users' Guide 3176 3177 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3178 @*/ 3179 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3180 { 3181 PetscFunctionBegin; 3182 PetscValidType(F, 1); 3183 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3184 PetscAssertPointer(size, 2); 3185 PetscAssertPointer(array, 3); 3186 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3187 PetscFunctionReturn(PETSC_SUCCESS); 3188 } 3189 3190 /*MC 3191 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3192 distributed and sequential matrices via the external package MUMPS. 3193 3194 Works with `MATAIJ` and `MATSBAIJ` matrices 3195 3196 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3197 3198 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3199 See details below. 3200 3201 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3202 3203 Options Database Keys: 3204 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3205 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3206 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3207 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3208 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3209 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3210 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3211 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3212 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3213 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3214 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3215 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3216 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3217 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3218 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3219 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3220 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3221 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3222 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3223 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3224 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3225 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 3226 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3227 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3228 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3229 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3230 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3231 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3232 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3233 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3234 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3235 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3236 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3237 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3238 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3239 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3240 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3241 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3242 3243 Level: beginner 3244 3245 Notes: 3246 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 3247 error if the matrix is Hermitian. 3248 3249 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3250 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3251 3252 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3253 the failure with 3254 .vb 3255 KSPGetPC(ksp,&pc); 3256 PCFactorGetMatrix(pc,&mat); 3257 MatMumpsGetInfo(mat,....); 3258 MatMumpsGetInfog(mat,....); etc. 3259 .ve 3260 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3261 3262 MUMPS provides 64-bit integer support in two build modes: 3263 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3264 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3265 3266 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3267 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3268 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3269 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3270 3271 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3272 3273 Two modes to run MUMPS/PETSc with OpenMP 3274 .vb 3275 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3276 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3277 .ve 3278 3279 .vb 3280 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3281 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3282 .ve 3283 3284 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3285 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3286 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3287 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3288 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3289 3290 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3291 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3292 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3293 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3294 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3295 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3296 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3297 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3298 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3299 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3300 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3301 examine the mapping result. 3302 3303 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3304 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3305 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3306 3307 References: 3308 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3309 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3310 3311 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3312 M*/ 3313 3314 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3315 { 3316 PetscFunctionBegin; 3317 *type = MATSOLVERMUMPS; 3318 PetscFunctionReturn(PETSC_SUCCESS); 3319 } 3320 3321 /* MatGetFactor for Seq and MPI AIJ matrices */ 3322 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3323 { 3324 Mat B; 3325 Mat_MUMPS *mumps; 3326 PetscBool isSeqAIJ; 3327 PetscMPIInt size; 3328 3329 PetscFunctionBegin; 3330 #if defined(PETSC_USE_COMPLEX) 3331 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3332 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3333 *F = NULL; 3334 PetscFunctionReturn(PETSC_SUCCESS); 3335 } 3336 #endif 3337 /* Create the factorization matrix */ 3338 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3339 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3340 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3341 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3342 PetscCall(MatSetUp(B)); 3343 3344 PetscCall(PetscNew(&mumps)); 3345 3346 B->ops->view = MatView_MUMPS; 3347 B->ops->getinfo = MatGetInfo_MUMPS; 3348 3349 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3350 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3351 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3352 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3353 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3354 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3355 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3356 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3357 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3358 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3359 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3360 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3361 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3362 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3363 3364 if (ftype == MAT_FACTOR_LU) { 3365 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3366 B->factortype = MAT_FACTOR_LU; 3367 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3368 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3369 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3370 mumps->sym = 0; 3371 } else { 3372 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3373 B->factortype = MAT_FACTOR_CHOLESKY; 3374 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3375 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3376 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3377 #if defined(PETSC_USE_COMPLEX) 3378 mumps->sym = 2; 3379 #else 3380 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3381 else mumps->sym = 2; 3382 #endif 3383 } 3384 3385 /* set solvertype */ 3386 PetscCall(PetscFree(B->solvertype)); 3387 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3388 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3389 if (size == 1) { 3390 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3391 B->canuseordering = PETSC_TRUE; 3392 } 3393 B->ops->destroy = MatDestroy_MUMPS; 3394 B->data = (void *)mumps; 3395 3396 *F = B; 3397 mumps->id.job = JOB_NULL; 3398 mumps->ICNTL_pre = NULL; 3399 mumps->CNTL_pre = NULL; 3400 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3401 PetscFunctionReturn(PETSC_SUCCESS); 3402 } 3403 3404 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3405 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3406 { 3407 Mat B; 3408 Mat_MUMPS *mumps; 3409 PetscBool isSeqSBAIJ; 3410 PetscMPIInt size; 3411 3412 PetscFunctionBegin; 3413 #if defined(PETSC_USE_COMPLEX) 3414 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3415 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3416 *F = NULL; 3417 PetscFunctionReturn(PETSC_SUCCESS); 3418 } 3419 #endif 3420 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3421 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3422 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3423 PetscCall(MatSetUp(B)); 3424 3425 PetscCall(PetscNew(&mumps)); 3426 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3427 if (isSeqSBAIJ) { 3428 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3429 } else { 3430 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3431 } 3432 3433 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3434 B->ops->view = MatView_MUMPS; 3435 B->ops->getinfo = MatGetInfo_MUMPS; 3436 3437 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3438 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3439 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3440 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3441 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3442 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3443 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3444 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3445 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3446 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3447 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3448 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3449 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3450 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3451 3452 B->factortype = MAT_FACTOR_CHOLESKY; 3453 #if defined(PETSC_USE_COMPLEX) 3454 mumps->sym = 2; 3455 #else 3456 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3457 else mumps->sym = 2; 3458 #endif 3459 3460 /* set solvertype */ 3461 PetscCall(PetscFree(B->solvertype)); 3462 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3463 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3464 if (size == 1) { 3465 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3466 B->canuseordering = PETSC_TRUE; 3467 } 3468 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3469 B->ops->destroy = MatDestroy_MUMPS; 3470 B->data = (void *)mumps; 3471 3472 *F = B; 3473 mumps->id.job = JOB_NULL; 3474 mumps->ICNTL_pre = NULL; 3475 mumps->CNTL_pre = NULL; 3476 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3477 PetscFunctionReturn(PETSC_SUCCESS); 3478 } 3479 3480 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3481 { 3482 Mat B; 3483 Mat_MUMPS *mumps; 3484 PetscBool isSeqBAIJ; 3485 PetscMPIInt size; 3486 3487 PetscFunctionBegin; 3488 /* Create the factorization matrix */ 3489 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3490 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3491 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3492 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3493 PetscCall(MatSetUp(B)); 3494 3495 PetscCall(PetscNew(&mumps)); 3496 if (ftype == MAT_FACTOR_LU) { 3497 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3498 B->factortype = MAT_FACTOR_LU; 3499 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3500 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3501 mumps->sym = 0; 3502 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3503 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3504 3505 B->ops->view = MatView_MUMPS; 3506 B->ops->getinfo = MatGetInfo_MUMPS; 3507 3508 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3509 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3510 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3511 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3512 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3513 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3514 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3515 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3516 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3517 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3518 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3519 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3520 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3521 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3522 3523 /* set solvertype */ 3524 PetscCall(PetscFree(B->solvertype)); 3525 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3526 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3527 if (size == 1) { 3528 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3529 B->canuseordering = PETSC_TRUE; 3530 } 3531 B->ops->destroy = MatDestroy_MUMPS; 3532 B->data = (void *)mumps; 3533 3534 *F = B; 3535 mumps->id.job = JOB_NULL; 3536 mumps->ICNTL_pre = NULL; 3537 mumps->CNTL_pre = NULL; 3538 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3539 PetscFunctionReturn(PETSC_SUCCESS); 3540 } 3541 3542 /* MatGetFactor for Seq and MPI SELL matrices */ 3543 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3544 { 3545 Mat B; 3546 Mat_MUMPS *mumps; 3547 PetscBool isSeqSELL; 3548 PetscMPIInt size; 3549 3550 PetscFunctionBegin; 3551 /* Create the factorization matrix */ 3552 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3553 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3554 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3555 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3556 PetscCall(MatSetUp(B)); 3557 3558 PetscCall(PetscNew(&mumps)); 3559 3560 B->ops->view = MatView_MUMPS; 3561 B->ops->getinfo = MatGetInfo_MUMPS; 3562 3563 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3564 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3565 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3566 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3567 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3568 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3569 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3570 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3571 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3572 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3573 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3574 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3575 3576 if (ftype == MAT_FACTOR_LU) { 3577 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3578 B->factortype = MAT_FACTOR_LU; 3579 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3580 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3581 mumps->sym = 0; 3582 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3583 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3584 3585 /* set solvertype */ 3586 PetscCall(PetscFree(B->solvertype)); 3587 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3588 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3589 if (size == 1) { 3590 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3591 B->canuseordering = PETSC_TRUE; 3592 } 3593 B->ops->destroy = MatDestroy_MUMPS; 3594 B->data = (void *)mumps; 3595 3596 *F = B; 3597 mumps->id.job = JOB_NULL; 3598 mumps->ICNTL_pre = NULL; 3599 mumps->CNTL_pre = NULL; 3600 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3601 PetscFunctionReturn(PETSC_SUCCESS); 3602 } 3603 3604 /* MatGetFactor for MATNEST matrices */ 3605 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3606 { 3607 Mat B, **mats; 3608 Mat_MUMPS *mumps; 3609 PetscInt nr, nc; 3610 PetscMPIInt size; 3611 PetscBool flg = PETSC_TRUE; 3612 3613 PetscFunctionBegin; 3614 #if defined(PETSC_USE_COMPLEX) 3615 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3616 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3617 *F = NULL; 3618 PetscFunctionReturn(PETSC_SUCCESS); 3619 } 3620 #endif 3621 3622 /* Return if some condition is not satisfied */ 3623 *F = NULL; 3624 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3625 if (ftype == MAT_FACTOR_CHOLESKY) { 3626 IS *rows, *cols; 3627 PetscInt *m, *M; 3628 3629 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3630 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3631 PetscCall(MatNestGetISs(A, rows, cols)); 3632 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3633 if (!flg) { 3634 PetscCall(PetscFree2(rows, cols)); 3635 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3636 PetscFunctionReturn(PETSC_SUCCESS); 3637 } 3638 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3639 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3640 for (PetscInt r = 0; flg && r < nr; r++) 3641 for (PetscInt k = r + 1; flg && k < nr; k++) 3642 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3643 PetscCall(PetscFree2(m, M)); 3644 PetscCall(PetscFree2(rows, cols)); 3645 if (!flg) { 3646 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3647 PetscFunctionReturn(PETSC_SUCCESS); 3648 } 3649 } 3650 3651 for (PetscInt r = 0; r < nr; r++) { 3652 for (PetscInt c = 0; c < nc; c++) { 3653 Mat sub = mats[r][c]; 3654 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans; 3655 3656 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3657 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3658 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3659 else { 3660 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3661 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3662 } 3663 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3664 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3665 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3666 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3667 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3668 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3669 if (ftype == MAT_FACTOR_CHOLESKY) { 3670 if (r == c) { 3671 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ) { 3672 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3673 flg = PETSC_FALSE; 3674 } 3675 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 3676 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3677 flg = PETSC_FALSE; 3678 } 3679 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 3680 PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3681 flg = PETSC_FALSE; 3682 } 3683 } 3684 } 3685 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3686 3687 /* Create the factorization matrix */ 3688 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3689 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3690 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3691 PetscCall(MatSetUp(B)); 3692 3693 PetscCall(PetscNew(&mumps)); 3694 3695 B->ops->view = MatView_MUMPS; 3696 B->ops->getinfo = MatGetInfo_MUMPS; 3697 3698 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3699 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3700 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3701 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3702 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3703 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3704 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3705 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3706 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3707 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3708 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3709 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3710 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3711 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3712 3713 if (ftype == MAT_FACTOR_LU) { 3714 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3715 B->factortype = MAT_FACTOR_LU; 3716 mumps->sym = 0; 3717 } else { 3718 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3719 B->factortype = MAT_FACTOR_CHOLESKY; 3720 #if defined(PETSC_USE_COMPLEX) 3721 mumps->sym = 2; 3722 #else 3723 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3724 else mumps->sym = 2; 3725 #endif 3726 } 3727 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3728 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3729 3730 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3731 if (size == 1) { 3732 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3733 B->canuseordering = PETSC_TRUE; 3734 } 3735 3736 /* set solvertype */ 3737 PetscCall(PetscFree(B->solvertype)); 3738 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3739 B->ops->destroy = MatDestroy_MUMPS; 3740 B->data = (void *)mumps; 3741 3742 *F = B; 3743 mumps->id.job = JOB_NULL; 3744 mumps->ICNTL_pre = NULL; 3745 mumps->CNTL_pre = NULL; 3746 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3747 PetscFunctionReturn(PETSC_SUCCESS); 3748 } 3749 3750 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3751 { 3752 PetscFunctionBegin; 3753 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3754 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3755 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3756 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3757 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3758 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3759 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3760 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3761 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3762 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3763 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3764 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3765 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3766 PetscFunctionReturn(PETSC_SUCCESS); 3767 } 3768