1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <petscsf.h> 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 9 #include <../src/mat/impls/sell/mpi/mpisell.h> 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #if defined(PETSC_USE_REAL_SINGLE) 14 #include <cmumps_c.h> 15 #else 16 #include <zmumps_c.h> 17 #endif 18 #else 19 #if defined(PETSC_USE_REAL_SINGLE) 20 #include <smumps_c.h> 21 #else 22 #include <dmumps_c.h> 23 #endif 24 #endif 25 EXTERN_C_END 26 #define JOB_INIT -1 27 #define JOB_NULL 0 28 #define JOB_FACTSYMBOLIC 1 29 #define JOB_FACTNUMERIC 2 30 #define JOB_SOLVE 3 31 #define JOB_END -2 32 33 /* calls to MUMPS */ 34 #if defined(PETSC_USE_COMPLEX) 35 #if defined(PETSC_USE_REAL_SINGLE) 36 #define MUMPS_c cmumps_c 37 #else 38 #define MUMPS_c zmumps_c 39 #endif 40 #else 41 #if defined(PETSC_USE_REAL_SINGLE) 42 #define MUMPS_c smumps_c 43 #else 44 #define MUMPS_c dmumps_c 45 #endif 46 #endif 47 48 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 49 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 50 naming convention in PetscMPIInt, PetscBLASInt etc. 51 */ 52 typedef MUMPS_INT PetscMUMPSInt; 53 54 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 55 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 56 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 57 #endif 58 #else 59 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 60 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 61 #endif 62 #endif 63 64 #define MPIU_MUMPSINT MPI_INT 65 #define PETSC_MUMPS_INT_MAX 2147483647 66 #define PETSC_MUMPS_INT_MIN -2147483648 67 68 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 69 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 70 { 71 PetscFunctionBegin; 72 #if PetscDefined(USE_64BIT_INDICES) 73 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 74 #endif 75 *b = (PetscMUMPSInt)(a); 76 PetscFunctionReturn(PETSC_SUCCESS); 77 } 78 79 /* Put these utility routines here since they are only used in this file */ 80 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 81 { 82 PetscInt myval; 83 PetscBool myset; 84 PetscFunctionBegin; 85 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 86 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 87 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 88 if (set) *set = myset; 89 PetscFunctionReturn(PETSC_SUCCESS); 90 } 91 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 92 93 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 94 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 95 #define PetscMUMPS_c(mumps) \ 96 do { \ 97 if (mumps->use_petsc_omp_support) { \ 98 if (mumps->is_omp_master) { \ 99 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 100 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 101 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 102 PetscCall(PetscFPTrapPop()); \ 103 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 104 } \ 105 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 106 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 107 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 108 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 109 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 110 */ \ 111 PetscCallMPI(MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 112 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm)); \ 113 PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 114 } else { \ 115 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 116 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 117 PetscCall(PetscFPTrapPop()); \ 118 } \ 119 } while (0) 120 #else 121 #define PetscMUMPS_c(mumps) \ 122 do { \ 123 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 124 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 125 PetscCall(PetscFPTrapPop()); \ 126 } while (0) 127 #endif 128 129 /* declare MumpsScalar */ 130 #if defined(PETSC_USE_COMPLEX) 131 #if defined(PETSC_USE_REAL_SINGLE) 132 #define MumpsScalar mumps_complex 133 #else 134 #define MumpsScalar mumps_double_complex 135 #endif 136 #else 137 #define MumpsScalar PetscScalar 138 #endif 139 140 /* macros s.t. indices match MUMPS documentation */ 141 #define ICNTL(I) icntl[(I)-1] 142 #define CNTL(I) cntl[(I)-1] 143 #define INFOG(I) infog[(I)-1] 144 #define INFO(I) info[(I)-1] 145 #define RINFOG(I) rinfog[(I)-1] 146 #define RINFO(I) rinfo[(I)-1] 147 148 typedef struct Mat_MUMPS Mat_MUMPS; 149 struct Mat_MUMPS { 150 #if defined(PETSC_USE_COMPLEX) 151 #if defined(PETSC_USE_REAL_SINGLE) 152 CMUMPS_STRUC_C id; 153 #else 154 ZMUMPS_STRUC_C id; 155 #endif 156 #else 157 #if defined(PETSC_USE_REAL_SINGLE) 158 SMUMPS_STRUC_C id; 159 #else 160 DMUMPS_STRUC_C id; 161 #endif 162 #endif 163 164 MatStructure matstruc; 165 PetscMPIInt myid, petsc_size; 166 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 167 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 168 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 169 PetscMUMPSInt sym; 170 MPI_Comm mumps_comm; 171 PetscMUMPSInt *ICNTL_pre; 172 PetscReal *CNTL_pre; 173 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 174 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 175 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 176 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 177 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 178 PetscInt *rhs_nrow, max_nrhs; 179 PetscMPIInt *rhs_recvcounts, *rhs_disps; 180 PetscScalar *rhs_loc, *rhs_recvbuf; 181 #endif 182 Vec b_seq, x_seq; 183 PetscInt ninfo, *info; /* which INFO to display */ 184 PetscInt sizeredrhs; 185 PetscScalar *schur_sol; 186 PetscInt schur_sizesol; 187 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 188 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 189 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 190 191 /* Support for MATNEST */ 192 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 193 PetscInt64 *nest_vals_start; 194 PetscScalar *nest_vals; 195 196 /* stuff used by petsc/mumps OpenMP support*/ 197 PetscBool use_petsc_omp_support; 198 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 199 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 200 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 201 PetscMPIInt tag, omp_comm_size; 202 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 203 MPI_Request *reqs; 204 }; 205 206 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 207 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 208 */ 209 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 210 { 211 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 212 213 PetscFunctionBegin; 214 #if defined(PETSC_USE_64BIT_INDICES) 215 { 216 PetscInt i; 217 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 218 PetscCall(PetscFree(mumps->ia_alloc)); 219 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 220 mumps->cur_ilen = nrow + 1; 221 } 222 if (nnz > mumps->cur_jlen) { 223 PetscCall(PetscFree(mumps->ja_alloc)); 224 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 225 mumps->cur_jlen = nnz; 226 } 227 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 228 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 229 *ia_mumps = mumps->ia_alloc; 230 *ja_mumps = mumps->ja_alloc; 231 } 232 #else 233 *ia_mumps = ia; 234 *ja_mumps = ja; 235 #endif 236 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 237 PetscFunctionReturn(PETSC_SUCCESS); 238 } 239 240 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 241 { 242 PetscFunctionBegin; 243 PetscCall(PetscFree(mumps->id.listvar_schur)); 244 PetscCall(PetscFree(mumps->id.redrhs)); 245 PetscCall(PetscFree(mumps->schur_sol)); 246 mumps->id.size_schur = 0; 247 mumps->id.schur_lld = 0; 248 mumps->id.ICNTL(19) = 0; 249 PetscFunctionReturn(PETSC_SUCCESS); 250 } 251 252 /* solve with rhs in mumps->id.redrhs and return in the same location */ 253 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 254 { 255 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 256 Mat S, B, X; 257 MatFactorSchurStatus schurstatus; 258 PetscInt sizesol; 259 260 PetscFunctionBegin; 261 PetscCall(MatFactorFactorizeSchurComplement(F)); 262 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 263 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 264 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 265 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 266 PetscCall(MatBindToCPU(B, S->boundtocpu)); 267 #endif 268 switch (schurstatus) { 269 case MAT_FACTOR_SCHUR_FACTORED: 270 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 271 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 272 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 273 PetscCall(MatBindToCPU(X, S->boundtocpu)); 274 #endif 275 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 276 PetscCall(MatMatSolveTranspose(S, B, X)); 277 } else { 278 PetscCall(MatMatSolve(S, B, X)); 279 } 280 break; 281 case MAT_FACTOR_SCHUR_INVERTED: 282 sizesol = mumps->id.nrhs * mumps->id.size_schur; 283 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 284 PetscCall(PetscFree(mumps->schur_sol)); 285 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 286 mumps->schur_sizesol = sizesol; 287 } 288 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 289 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 290 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 291 PetscCall(MatBindToCPU(X, S->boundtocpu)); 292 #endif 293 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 294 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 295 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 296 } else { 297 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 298 } 299 PetscCall(MatProductSetFromOptions(X)); 300 PetscCall(MatProductSymbolic(X)); 301 PetscCall(MatProductNumeric(X)); 302 303 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 304 break; 305 default: 306 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 307 } 308 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 309 PetscCall(MatDestroy(&B)); 310 PetscCall(MatDestroy(&X)); 311 PetscFunctionReturn(PETSC_SUCCESS); 312 } 313 314 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 315 { 316 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 317 318 PetscFunctionBegin; 319 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 320 PetscFunctionReturn(PETSC_SUCCESS); 321 } 322 if (!expansion) { /* prepare for the condensation step */ 323 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 324 /* allocate MUMPS internal array to store reduced right-hand sides */ 325 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 326 PetscCall(PetscFree(mumps->id.redrhs)); 327 mumps->id.lredrhs = mumps->id.size_schur; 328 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 329 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 330 } 331 mumps->id.ICNTL(26) = 1; /* condensation phase */ 332 } else { /* prepare for the expansion step */ 333 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 334 PetscCall(MatMumpsSolveSchur_Private(F)); 335 mumps->id.ICNTL(26) = 2; /* expansion phase */ 336 PetscMUMPS_c(mumps); 337 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 338 /* restore defaults */ 339 mumps->id.ICNTL(26) = -1; 340 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 341 if (mumps->id.nrhs > 1) { 342 PetscCall(PetscFree(mumps->id.redrhs)); 343 mumps->id.lredrhs = 0; 344 mumps->sizeredrhs = 0; 345 } 346 } 347 PetscFunctionReturn(PETSC_SUCCESS); 348 } 349 350 /* 351 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 352 353 input: 354 A - matrix in aij,baij or sbaij format 355 shift - 0: C style output triple; 1: Fortran style output triple. 356 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 357 MAT_REUSE_MATRIX: only the values in v array are updated 358 output: 359 nnz - dim of r, c, and v (number of local nonzero entries of A) 360 r, c, v - row and col index, matrix values (matrix triples) 361 362 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 363 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 364 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 365 366 */ 367 368 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 369 { 370 const PetscScalar *av; 371 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 372 PetscInt64 nz, rnz, i, j, k; 373 PetscMUMPSInt *row, *col; 374 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 375 376 PetscFunctionBegin; 377 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 378 mumps->val = (PetscScalar *)av; 379 if (reuse == MAT_INITIAL_MATRIX) { 380 nz = aa->nz; 381 ai = aa->i; 382 aj = aa->j; 383 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 384 for (i = k = 0; i < M; i++) { 385 rnz = ai[i + 1] - ai[i]; 386 ajj = aj + ai[i]; 387 for (j = 0; j < rnz; j++) { 388 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 389 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 390 k++; 391 } 392 } 393 mumps->irn = row; 394 mumps->jcn = col; 395 mumps->nnz = nz; 396 } 397 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 398 PetscFunctionReturn(PETSC_SUCCESS); 399 } 400 401 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 402 { 403 PetscInt64 nz, i, j, k, r; 404 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 405 PetscMUMPSInt *row, *col; 406 407 PetscFunctionBegin; 408 mumps->val = a->val; 409 if (reuse == MAT_INITIAL_MATRIX) { 410 nz = a->sliidx[a->totalslices]; 411 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 412 for (i = k = 0; i < a->totalslices; i++) { 413 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 414 } 415 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 416 mumps->irn = row; 417 mumps->jcn = col; 418 mumps->nnz = nz; 419 } 420 PetscFunctionReturn(PETSC_SUCCESS); 421 } 422 423 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 424 { 425 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 426 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 427 PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 428 PetscInt bs; 429 PetscMUMPSInt *row, *col; 430 431 PetscFunctionBegin; 432 PetscCall(MatGetBlockSize(A, &bs)); 433 M = A->rmap->N / bs; 434 mumps->val = aa->a; 435 if (reuse == MAT_INITIAL_MATRIX) { 436 ai = aa->i; 437 aj = aa->j; 438 nz = bs2 * aa->nz; 439 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 440 for (i = 0; i < M; i++) { 441 ajj = aj + ai[i]; 442 rnz = ai[i + 1] - ai[i]; 443 for (k = 0; k < rnz; k++) { 444 for (j = 0; j < bs; j++) { 445 for (m = 0; m < bs; m++) { 446 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 447 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 448 idx++; 449 } 450 } 451 } 452 } 453 mumps->irn = row; 454 mumps->jcn = col; 455 mumps->nnz = nz; 456 } 457 PetscFunctionReturn(PETSC_SUCCESS); 458 } 459 460 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 461 { 462 const PetscInt *ai, *aj, *ajj; 463 PetscInt bs; 464 PetscInt64 nz, rnz, i, j, k, m; 465 PetscMUMPSInt *row, *col; 466 PetscScalar *val; 467 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 468 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 469 #if defined(PETSC_USE_COMPLEX) 470 PetscBool isset, hermitian; 471 #endif 472 473 PetscFunctionBegin; 474 #if defined(PETSC_USE_COMPLEX) 475 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 476 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 477 #endif 478 ai = aa->i; 479 aj = aa->j; 480 PetscCall(MatGetBlockSize(A, &bs)); 481 if (reuse == MAT_INITIAL_MATRIX) { 482 const PetscInt64 alloc_size = aa->nz * bs2; 483 484 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 485 if (bs > 1) { 486 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 487 mumps->val = mumps->val_alloc; 488 } else { 489 mumps->val = aa->a; 490 } 491 mumps->irn = row; 492 mumps->jcn = col; 493 } else { 494 if (bs == 1) mumps->val = aa->a; 495 row = mumps->irn; 496 col = mumps->jcn; 497 } 498 val = mumps->val; 499 500 nz = 0; 501 if (bs > 1) { 502 for (i = 0; i < mbs; i++) { 503 rnz = ai[i + 1] - ai[i]; 504 ajj = aj + ai[i]; 505 for (j = 0; j < rnz; j++) { 506 for (k = 0; k < bs; k++) { 507 for (m = 0; m < bs; m++) { 508 if (ajj[j] > i || k >= m) { 509 if (reuse == MAT_INITIAL_MATRIX) { 510 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 511 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 512 } 513 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 514 } 515 } 516 } 517 } 518 } 519 } else if (reuse == MAT_INITIAL_MATRIX) { 520 for (i = 0; i < mbs; i++) { 521 rnz = ai[i + 1] - ai[i]; 522 ajj = aj + ai[i]; 523 for (j = 0; j < rnz; j++) { 524 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 525 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 526 nz++; 527 } 528 } 529 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 530 } 531 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 532 PetscFunctionReturn(PETSC_SUCCESS); 533 } 534 535 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 536 { 537 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 538 PetscInt64 nz, rnz, i, j; 539 const PetscScalar *av, *v1; 540 PetscScalar *val; 541 PetscMUMPSInt *row, *col; 542 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 543 PetscBool missing; 544 #if defined(PETSC_USE_COMPLEX) 545 PetscBool hermitian, isset; 546 #endif 547 548 PetscFunctionBegin; 549 #if defined(PETSC_USE_COMPLEX) 550 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 551 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 552 #endif 553 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 554 ai = aa->i; 555 aj = aa->j; 556 adiag = aa->diag; 557 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 558 if (reuse == MAT_INITIAL_MATRIX) { 559 /* count nz in the upper triangular part of A */ 560 nz = 0; 561 if (missing) { 562 for (i = 0; i < M; i++) { 563 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 564 for (j = ai[i]; j < ai[i + 1]; j++) { 565 if (aj[j] < i) continue; 566 nz++; 567 } 568 } else { 569 nz += ai[i + 1] - adiag[i]; 570 } 571 } 572 } else { 573 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 574 } 575 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 576 PetscCall(PetscMalloc1(nz, &val)); 577 mumps->nnz = nz; 578 mumps->irn = row; 579 mumps->jcn = col; 580 mumps->val = mumps->val_alloc = val; 581 582 nz = 0; 583 if (missing) { 584 for (i = 0; i < M; i++) { 585 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 586 for (j = ai[i]; j < ai[i + 1]; j++) { 587 if (aj[j] < i) continue; 588 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 589 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 590 val[nz] = av[j]; 591 nz++; 592 } 593 } else { 594 rnz = ai[i + 1] - adiag[i]; 595 ajj = aj + adiag[i]; 596 v1 = av + adiag[i]; 597 for (j = 0; j < rnz; j++) { 598 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 599 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 600 val[nz++] = v1[j]; 601 } 602 } 603 } 604 } else { 605 for (i = 0; i < M; i++) { 606 rnz = ai[i + 1] - adiag[i]; 607 ajj = aj + adiag[i]; 608 v1 = av + adiag[i]; 609 for (j = 0; j < rnz; j++) { 610 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 611 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 612 val[nz++] = v1[j]; 613 } 614 } 615 } 616 } else { 617 nz = 0; 618 val = mumps->val; 619 if (missing) { 620 for (i = 0; i < M; i++) { 621 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 622 for (j = ai[i]; j < ai[i + 1]; j++) { 623 if (aj[j] < i) continue; 624 val[nz++] = av[j]; 625 } 626 } else { 627 rnz = ai[i + 1] - adiag[i]; 628 v1 = av + adiag[i]; 629 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 630 } 631 } 632 } else { 633 for (i = 0; i < M; i++) { 634 rnz = ai[i + 1] - adiag[i]; 635 v1 = av + adiag[i]; 636 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 637 } 638 } 639 } 640 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 641 PetscFunctionReturn(PETSC_SUCCESS); 642 } 643 644 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 645 { 646 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 647 PetscInt bs; 648 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 649 PetscMUMPSInt *row, *col; 650 const PetscScalar *av, *bv, *v1, *v2; 651 PetscScalar *val; 652 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 653 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 654 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 655 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 656 #if defined(PETSC_USE_COMPLEX) 657 PetscBool hermitian, isset; 658 #endif 659 660 PetscFunctionBegin; 661 #if defined(PETSC_USE_COMPLEX) 662 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 663 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 664 #endif 665 PetscCall(MatGetBlockSize(A, &bs)); 666 rstart = A->rmap->rstart; 667 ai = aa->i; 668 aj = aa->j; 669 bi = bb->i; 670 bj = bb->j; 671 av = aa->a; 672 bv = bb->a; 673 674 garray = mat->garray; 675 676 if (reuse == MAT_INITIAL_MATRIX) { 677 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 678 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 679 PetscCall(PetscMalloc1(nz, &val)); 680 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 681 mumps->irn = row; 682 mumps->jcn = col; 683 mumps->val = mumps->val_alloc = val; 684 } else { 685 val = mumps->val; 686 } 687 688 jj = 0; 689 irow = rstart; 690 for (i = 0; i < mbs; i++) { 691 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 692 countA = ai[i + 1] - ai[i]; 693 countB = bi[i + 1] - bi[i]; 694 bjj = bj + bi[i]; 695 v1 = av + ai[i] * bs2; 696 v2 = bv + bi[i] * bs2; 697 698 if (bs > 1) { 699 /* A-part */ 700 for (j = 0; j < countA; j++) { 701 for (k = 0; k < bs; k++) { 702 for (m = 0; m < bs; m++) { 703 if (rstart + ajj[j] * bs > irow || k >= m) { 704 if (reuse == MAT_INITIAL_MATRIX) { 705 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 706 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 707 } 708 val[jj++] = v1[j * bs2 + m + k * bs]; 709 } 710 } 711 } 712 } 713 714 /* B-part */ 715 for (j = 0; j < countB; j++) { 716 for (k = 0; k < bs; k++) { 717 for (m = 0; m < bs; m++) { 718 if (reuse == MAT_INITIAL_MATRIX) { 719 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 720 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 721 } 722 val[jj++] = v2[j * bs2 + m + k * bs]; 723 } 724 } 725 } 726 } else { 727 /* A-part */ 728 for (j = 0; j < countA; j++) { 729 if (reuse == MAT_INITIAL_MATRIX) { 730 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 731 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 732 } 733 val[jj++] = v1[j]; 734 } 735 736 /* B-part */ 737 for (j = 0; j < countB; j++) { 738 if (reuse == MAT_INITIAL_MATRIX) { 739 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 740 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 741 } 742 val[jj++] = v2[j]; 743 } 744 } 745 irow += bs; 746 } 747 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 748 PetscFunctionReturn(PETSC_SUCCESS); 749 } 750 751 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 752 { 753 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 754 PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 755 PetscMUMPSInt *row, *col; 756 const PetscScalar *av, *bv, *v1, *v2; 757 PetscScalar *val; 758 Mat Ad, Ao; 759 Mat_SeqAIJ *aa; 760 Mat_SeqAIJ *bb; 761 762 PetscFunctionBegin; 763 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 764 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 765 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 766 767 aa = (Mat_SeqAIJ *)(Ad)->data; 768 bb = (Mat_SeqAIJ *)(Ao)->data; 769 ai = aa->i; 770 aj = aa->j; 771 bi = bb->i; 772 bj = bb->j; 773 774 rstart = A->rmap->rstart; 775 cstart = A->cmap->rstart; 776 777 if (reuse == MAT_INITIAL_MATRIX) { 778 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 779 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 780 PetscCall(PetscMalloc1(nz, &val)); 781 mumps->nnz = nz; 782 mumps->irn = row; 783 mumps->jcn = col; 784 mumps->val = mumps->val_alloc = val; 785 } else { 786 val = mumps->val; 787 } 788 789 jj = 0; 790 irow = rstart; 791 for (i = 0; i < m; i++) { 792 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 793 countA = ai[i + 1] - ai[i]; 794 countB = bi[i + 1] - bi[i]; 795 bjj = bj + bi[i]; 796 v1 = av + ai[i]; 797 v2 = bv + bi[i]; 798 799 /* A-part */ 800 for (j = 0; j < countA; j++) { 801 if (reuse == MAT_INITIAL_MATRIX) { 802 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 803 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 804 } 805 val[jj++] = v1[j]; 806 } 807 808 /* B-part */ 809 for (j = 0; j < countB; j++) { 810 if (reuse == MAT_INITIAL_MATRIX) { 811 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 812 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 813 } 814 val[jj++] = v2[j]; 815 } 816 irow++; 817 } 818 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 819 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 820 PetscFunctionReturn(PETSC_SUCCESS); 821 } 822 823 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 824 { 825 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 826 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 827 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 828 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 829 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 830 const PetscInt bs2 = mat->bs2; 831 PetscInt bs; 832 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 833 PetscMUMPSInt *row, *col; 834 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 835 PetscScalar *val; 836 837 PetscFunctionBegin; 838 PetscCall(MatGetBlockSize(A, &bs)); 839 if (reuse == MAT_INITIAL_MATRIX) { 840 nz = bs2 * (aa->nz + bb->nz); 841 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 842 PetscCall(PetscMalloc1(nz, &val)); 843 mumps->nnz = nz; 844 mumps->irn = row; 845 mumps->jcn = col; 846 mumps->val = mumps->val_alloc = val; 847 } else { 848 val = mumps->val; 849 } 850 851 jj = 0; 852 irow = rstart; 853 for (i = 0; i < mbs; i++) { 854 countA = ai[i + 1] - ai[i]; 855 countB = bi[i + 1] - bi[i]; 856 ajj = aj + ai[i]; 857 bjj = bj + bi[i]; 858 v1 = av + bs2 * ai[i]; 859 v2 = bv + bs2 * bi[i]; 860 861 idx = 0; 862 /* A-part */ 863 for (k = 0; k < countA; k++) { 864 for (j = 0; j < bs; j++) { 865 for (n = 0; n < bs; n++) { 866 if (reuse == MAT_INITIAL_MATRIX) { 867 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 868 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 869 } 870 val[jj++] = v1[idx++]; 871 } 872 } 873 } 874 875 idx = 0; 876 /* B-part */ 877 for (k = 0; k < countB; k++) { 878 for (j = 0; j < bs; j++) { 879 for (n = 0; n < bs; n++) { 880 if (reuse == MAT_INITIAL_MATRIX) { 881 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 882 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 883 } 884 val[jj++] = v2[idx++]; 885 } 886 } 887 } 888 irow += bs; 889 } 890 PetscFunctionReturn(PETSC_SUCCESS); 891 } 892 893 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 894 { 895 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 896 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 897 PetscMUMPSInt *row, *col; 898 const PetscScalar *av, *bv, *v1, *v2; 899 PetscScalar *val; 900 Mat Ad, Ao; 901 Mat_SeqAIJ *aa; 902 Mat_SeqAIJ *bb; 903 #if defined(PETSC_USE_COMPLEX) 904 PetscBool hermitian, isset; 905 #endif 906 907 PetscFunctionBegin; 908 #if defined(PETSC_USE_COMPLEX) 909 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 910 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 911 #endif 912 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 913 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 914 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 915 916 aa = (Mat_SeqAIJ *)(Ad)->data; 917 bb = (Mat_SeqAIJ *)(Ao)->data; 918 ai = aa->i; 919 aj = aa->j; 920 adiag = aa->diag; 921 bi = bb->i; 922 bj = bb->j; 923 924 rstart = A->rmap->rstart; 925 926 if (reuse == MAT_INITIAL_MATRIX) { 927 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 928 nzb = 0; /* num of upper triangular entries in mat->B */ 929 for (i = 0; i < m; i++) { 930 nza += (ai[i + 1] - adiag[i]); 931 countB = bi[i + 1] - bi[i]; 932 bjj = bj + bi[i]; 933 for (j = 0; j < countB; j++) { 934 if (garray[bjj[j]] > rstart) nzb++; 935 } 936 } 937 938 nz = nza + nzb; /* total nz of upper triangular part of mat */ 939 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 940 PetscCall(PetscMalloc1(nz, &val)); 941 mumps->nnz = nz; 942 mumps->irn = row; 943 mumps->jcn = col; 944 mumps->val = mumps->val_alloc = val; 945 } else { 946 val = mumps->val; 947 } 948 949 jj = 0; 950 irow = rstart; 951 for (i = 0; i < m; i++) { 952 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 953 v1 = av + adiag[i]; 954 countA = ai[i + 1] - adiag[i]; 955 countB = bi[i + 1] - bi[i]; 956 bjj = bj + bi[i]; 957 v2 = bv + bi[i]; 958 959 /* A-part */ 960 for (j = 0; j < countA; j++) { 961 if (reuse == MAT_INITIAL_MATRIX) { 962 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 963 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 964 } 965 val[jj++] = v1[j]; 966 } 967 968 /* B-part */ 969 for (j = 0; j < countB; j++) { 970 if (garray[bjj[j]] > rstart) { 971 if (reuse == MAT_INITIAL_MATRIX) { 972 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 973 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 974 } 975 val[jj++] = v2[j]; 976 } 977 } 978 irow++; 979 } 980 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 981 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 982 PetscFunctionReturn(PETSC_SUCCESS); 983 } 984 985 PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 986 { 987 Mat **mats; 988 PetscInt nr, nc; 989 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 990 991 PetscFunctionBegin; 992 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 993 if (reuse == MAT_INITIAL_MATRIX) { 994 PetscMUMPSInt *irns, *jcns; 995 PetscScalar *vals; 996 PetscInt64 totnnz, cumnnz, maxnnz; 997 PetscInt *pjcns_w; 998 IS *rows, *cols; 999 PetscInt **rows_idx, **cols_idx; 1000 1001 cumnnz = 0; 1002 maxnnz = 0; 1003 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1004 for (PetscInt r = 0; r < nr; r++) { 1005 for (PetscInt c = 0; c < nc; c++) { 1006 Mat sub = mats[r][c]; 1007 1008 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1009 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1010 if (sub) { 1011 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1012 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE; 1013 MatInfo info; 1014 1015 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1016 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1017 else { 1018 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1019 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1020 } 1021 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1022 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1023 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1024 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1025 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1026 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1027 1028 if (chol) { 1029 if (r == c) { 1030 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1031 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1032 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1033 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1034 } else { 1035 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1036 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1037 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1038 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1039 } 1040 } else { 1041 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1042 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1043 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1044 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1045 } 1046 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1047 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1048 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1049 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1050 maxnnz = PetscMax(maxnnz, info.nz_used); 1051 } 1052 } 1053 } 1054 1055 /* Allocate total COO */ 1056 totnnz = cumnnz; 1057 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1058 PetscCall(PetscMalloc1(totnnz, &vals)); 1059 1060 /* Handle rows and column maps 1061 We directly map rows and use an SF for the columns */ 1062 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1063 PetscCall(MatNestGetISs(A, rows, cols)); 1064 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1065 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1066 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1067 else (void)maxnnz; 1068 1069 cumnnz = 0; 1070 for (PetscInt r = 0; r < nr; r++) { 1071 for (PetscInt c = 0; c < nc; c++) { 1072 Mat sub = mats[r][c]; 1073 const PetscInt *ridx = rows_idx[r]; 1074 const PetscInt *cidx = cols_idx[c]; 1075 PetscInt rst; 1076 PetscSF csf; 1077 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1078 PetscLayout cmap; 1079 1080 mumps->nest_vals_start[r * nc + c] = cumnnz; 1081 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1082 1083 /* Extract inner blocks if needed */ 1084 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1085 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1086 else { 1087 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1088 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1089 } 1090 swap = (PetscBool)(isTrans || isHTrans); 1091 1092 /* Get column layout to map off-process columns */ 1093 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1094 1095 /* Get row start to map on-process rows */ 1096 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1097 1098 /* Directly use the mumps datastructure and use C ordering for now */ 1099 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1100 1101 /* Swap the role of rows and columns indices for transposed blocks 1102 since we need values with global final ordering */ 1103 if (swap) { 1104 cidx = rows_idx[r]; 1105 ridx = cols_idx[c]; 1106 } 1107 1108 /* Communicate column indices 1109 This could have been done with a single SF but it would have complicated the code a lot. 1110 But since we do it only once, we pay the price of setting up an SF for each block */ 1111 if (PetscDefined(USE_64BIT_INDICES)) { 1112 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1113 } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 1114 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1115 PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1116 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1117 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1118 PetscCall(PetscSFDestroy(&csf)); 1119 1120 /* Import indices: use direct map for rows and mapped indices for columns */ 1121 if (swap) { 1122 for (PetscInt k = 0; k < mumps->nnz; k++) { 1123 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1124 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1125 } 1126 } else { 1127 for (PetscInt k = 0; k < mumps->nnz; k++) { 1128 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1129 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1130 } 1131 } 1132 1133 /* Import values to full COO */ 1134 if (isHTrans) { /* conjugate the entries */ 1135 for (PetscInt k = 0; k < mumps->nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1136 } 1137 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1138 1139 /* Shift new starting point and sanity check */ 1140 cumnnz += mumps->nnz; 1141 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1142 1143 /* Free scratch memory */ 1144 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1145 PetscCall(PetscFree(mumps->val_alloc)); 1146 mumps->val = NULL; 1147 mumps->nnz = 0; 1148 } 1149 } 1150 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1151 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1152 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1153 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1154 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1155 mumps->nest_vals_start[nr * nc] = cumnnz; 1156 1157 /* Set pointers for final MUMPS data structure */ 1158 mumps->nest_vals = vals; 1159 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1160 mumps->val = vals; 1161 mumps->irn = irns; 1162 mumps->jcn = jcns; 1163 mumps->nnz = cumnnz; 1164 } else { 1165 PetscScalar *oval = mumps->nest_vals; 1166 for (PetscInt r = 0; r < nr; r++) { 1167 for (PetscInt c = 0; c < nc; c++) { 1168 PetscBool isTrans, isHTrans = PETSC_FALSE; 1169 Mat sub = mats[r][c]; 1170 PetscInt midx = r * nc + c; 1171 1172 if (!mumps->nest_convert_to_triples[midx]) continue; 1173 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1174 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1175 else { 1176 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1177 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1178 } 1179 mumps->val = oval + mumps->nest_vals_start[midx]; 1180 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1181 if (isHTrans) { 1182 PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1183 for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1184 } 1185 } 1186 } 1187 mumps->val = oval; 1188 } 1189 PetscFunctionReturn(PETSC_SUCCESS); 1190 } 1191 1192 PetscErrorCode MatDestroy_MUMPS(Mat A) 1193 { 1194 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1195 1196 PetscFunctionBegin; 1197 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1198 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1199 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1200 PetscCall(VecDestroy(&mumps->b_seq)); 1201 PetscCall(VecDestroy(&mumps->x_seq)); 1202 PetscCall(PetscFree(mumps->id.perm_in)); 1203 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1204 PetscCall(PetscFree(mumps->val_alloc)); 1205 PetscCall(PetscFree(mumps->info)); 1206 PetscCall(PetscFree(mumps->ICNTL_pre)); 1207 PetscCall(PetscFree(mumps->CNTL_pre)); 1208 PetscCall(MatMumpsResetSchur_Private(mumps)); 1209 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1210 mumps->id.job = JOB_END; 1211 PetscMUMPS_c(mumps); 1212 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1213 if (mumps->mumps_comm != MPI_COMM_NULL) { 1214 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1215 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1216 } 1217 } 1218 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1219 if (mumps->use_petsc_omp_support) { 1220 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1221 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1222 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1223 } 1224 #endif 1225 PetscCall(PetscFree(mumps->ia_alloc)); 1226 PetscCall(PetscFree(mumps->ja_alloc)); 1227 PetscCall(PetscFree(mumps->recvcount)); 1228 PetscCall(PetscFree(mumps->reqs)); 1229 PetscCall(PetscFree(mumps->irhs_loc)); 1230 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1231 PetscCall(PetscFree(mumps->nest_vals)); 1232 PetscCall(PetscFree(A->data)); 1233 1234 /* clear composed functions */ 1235 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1236 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1237 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1238 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1239 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1240 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1241 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1242 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1243 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1244 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1245 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1246 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1247 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1248 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1249 PetscFunctionReturn(PETSC_SUCCESS); 1250 } 1251 1252 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1253 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1254 { 1255 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1256 const PetscMPIInt ompsize = mumps->omp_comm_size; 1257 PetscInt i, m, M, rstart; 1258 1259 PetscFunctionBegin; 1260 PetscCall(MatGetSize(A, &M, NULL)); 1261 PetscCall(MatGetLocalSize(A, &m, NULL)); 1262 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1263 if (ompsize == 1) { 1264 if (!mumps->irhs_loc) { 1265 mumps->nloc_rhs = m; 1266 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1267 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1268 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1269 } 1270 mumps->id.rhs_loc = (MumpsScalar *)array; 1271 } else { 1272 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1273 const PetscInt *ranges; 1274 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1275 MPI_Group petsc_group, omp_group; 1276 PetscScalar *recvbuf = NULL; 1277 1278 if (mumps->is_omp_master) { 1279 /* Lazily initialize the omp stuff for distributed rhs */ 1280 if (!mumps->irhs_loc) { 1281 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1282 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1283 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1284 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1285 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1286 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1287 1288 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1289 mumps->nloc_rhs = 0; 1290 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1291 for (j = 0; j < ompsize; j++) { 1292 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1293 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1294 } 1295 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1296 for (j = k = 0; j < ompsize; j++) { 1297 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1298 } 1299 1300 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1301 PetscCallMPI(MPI_Group_free(&petsc_group)); 1302 PetscCallMPI(MPI_Group_free(&omp_group)); 1303 } 1304 1305 /* Realloc buffers when current nrhs is bigger than what we have met */ 1306 if (nrhs > mumps->max_nrhs) { 1307 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1308 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1309 mumps->max_nrhs = nrhs; 1310 } 1311 1312 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1313 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1314 mumps->rhs_disps[0] = 0; 1315 for (j = 1; j < ompsize; j++) { 1316 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1317 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1318 } 1319 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1320 } 1321 1322 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1323 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1324 1325 if (mumps->is_omp_master) { 1326 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1327 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1328 for (j = 0; j < ompsize; j++) { 1329 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1330 dst = dstbase; 1331 for (i = 0; i < nrhs; i++) { 1332 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1333 src += mumps->rhs_nrow[j]; 1334 dst += mumps->nloc_rhs; 1335 } 1336 dstbase += mumps->rhs_nrow[j]; 1337 } 1338 } 1339 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1340 } 1341 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1342 } 1343 mumps->id.nrhs = nrhs; 1344 mumps->id.nloc_rhs = mumps->nloc_rhs; 1345 mumps->id.lrhs_loc = mumps->nloc_rhs; 1346 mumps->id.irhs_loc = mumps->irhs_loc; 1347 PetscFunctionReturn(PETSC_SUCCESS); 1348 } 1349 1350 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1351 { 1352 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1353 const PetscScalar *rarray = NULL; 1354 PetscScalar *array; 1355 IS is_iden, is_petsc; 1356 PetscInt i; 1357 PetscBool second_solve = PETSC_FALSE; 1358 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1359 1360 PetscFunctionBegin; 1361 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1362 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1363 &cite1)); 1364 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1365 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1366 &cite2)); 1367 1368 if (A->factorerrortype) { 1369 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1370 PetscCall(VecSetInf(x)); 1371 PetscFunctionReturn(PETSC_SUCCESS); 1372 } 1373 1374 mumps->id.nrhs = 1; 1375 if (mumps->petsc_size > 1) { 1376 if (mumps->ICNTL20 == 10) { 1377 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1378 PetscCall(VecGetArrayRead(b, &rarray)); 1379 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1380 } else { 1381 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1382 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1383 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1384 if (!mumps->myid) { 1385 PetscCall(VecGetArray(mumps->b_seq, &array)); 1386 mumps->id.rhs = (MumpsScalar *)array; 1387 } 1388 } 1389 } else { /* petsc_size == 1 */ 1390 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1391 PetscCall(VecCopy(b, x)); 1392 PetscCall(VecGetArray(x, &array)); 1393 mumps->id.rhs = (MumpsScalar *)array; 1394 } 1395 1396 /* 1397 handle condensation step of Schur complement (if any) 1398 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1399 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1400 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1401 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1402 */ 1403 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1404 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1405 second_solve = PETSC_TRUE; 1406 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1407 } 1408 /* solve phase */ 1409 mumps->id.job = JOB_SOLVE; 1410 PetscMUMPS_c(mumps); 1411 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1412 1413 /* handle expansion step of Schur complement (if any) */ 1414 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1415 1416 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1417 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1418 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1419 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1420 } 1421 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1422 PetscInt *isol2_loc = NULL; 1423 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1424 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1425 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1426 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1427 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1428 PetscCall(ISDestroy(&is_iden)); 1429 PetscCall(ISDestroy(&is_petsc)); 1430 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1431 } 1432 1433 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1434 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1435 } 1436 1437 if (mumps->petsc_size > 1) { 1438 if (mumps->ICNTL20 == 10) { 1439 PetscCall(VecRestoreArrayRead(b, &rarray)); 1440 } else if (!mumps->myid) { 1441 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1442 } 1443 } else PetscCall(VecRestoreArray(x, &array)); 1444 1445 PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3))); 1446 PetscFunctionReturn(PETSC_SUCCESS); 1447 } 1448 1449 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1450 { 1451 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1452 1453 PetscFunctionBegin; 1454 mumps->id.ICNTL(9) = 0; 1455 PetscCall(MatSolve_MUMPS(A, b, x)); 1456 mumps->id.ICNTL(9) = 1; 1457 PetscFunctionReturn(PETSC_SUCCESS); 1458 } 1459 1460 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1461 { 1462 Mat Bt = NULL; 1463 PetscBool denseX, denseB, flg, flgT; 1464 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1465 PetscInt i, nrhs, M; 1466 PetscScalar *array; 1467 const PetscScalar *rbray; 1468 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1469 PetscMUMPSInt *isol_loc, *isol_loc_save; 1470 PetscScalar *bray, *sol_loc, *sol_loc_save; 1471 IS is_to, is_from; 1472 PetscInt k, proc, j, m, myrstart; 1473 const PetscInt *rstart; 1474 Vec v_mpi, msol_loc; 1475 VecScatter scat_sol; 1476 Vec b_seq; 1477 VecScatter scat_rhs; 1478 PetscScalar *aa; 1479 PetscInt spnr, *ia, *ja; 1480 Mat_MPIAIJ *b = NULL; 1481 1482 PetscFunctionBegin; 1483 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1484 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1485 1486 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1487 if (denseB) { 1488 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1489 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1490 } else { /* sparse B */ 1491 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1492 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1493 if (flgT) { /* input B is transpose of actual RHS matrix, 1494 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1495 PetscCall(MatTransposeGetMat(B, &Bt)); 1496 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1497 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1498 } 1499 1500 PetscCall(MatGetSize(B, &M, &nrhs)); 1501 mumps->id.nrhs = nrhs; 1502 mumps->id.lrhs = M; 1503 mumps->id.rhs = NULL; 1504 1505 if (mumps->petsc_size == 1) { 1506 PetscScalar *aa; 1507 PetscInt spnr, *ia, *ja; 1508 PetscBool second_solve = PETSC_FALSE; 1509 1510 PetscCall(MatDenseGetArray(X, &array)); 1511 mumps->id.rhs = (MumpsScalar *)array; 1512 1513 if (denseB) { 1514 /* copy B to X */ 1515 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1516 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1517 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1518 } else { /* sparse B */ 1519 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1520 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1521 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1522 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1523 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1524 } 1525 /* handle condensation step of Schur complement (if any) */ 1526 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1527 second_solve = PETSC_TRUE; 1528 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1529 } 1530 /* solve phase */ 1531 mumps->id.job = JOB_SOLVE; 1532 PetscMUMPS_c(mumps); 1533 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1534 1535 /* handle expansion step of Schur complement (if any) */ 1536 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1537 if (!denseB) { /* sparse B */ 1538 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1539 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1540 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1541 } 1542 PetscCall(MatDenseRestoreArray(X, &array)); 1543 PetscFunctionReturn(PETSC_SUCCESS); 1544 } 1545 1546 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1547 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1548 1549 /* create msol_loc to hold mumps local solution */ 1550 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1551 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1552 1553 lsol_loc = mumps->id.lsol_loc; 1554 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1555 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1556 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1557 mumps->id.isol_loc = isol_loc; 1558 1559 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1560 1561 if (denseB) { 1562 if (mumps->ICNTL20 == 10) { 1563 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1564 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1565 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1566 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1567 PetscCall(MatGetLocalSize(B, &m, NULL)); 1568 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1569 } else { 1570 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1571 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1572 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1573 0, re-arrange B into desired order, which is a local operation. 1574 */ 1575 1576 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1577 /* wrap dense rhs matrix B into a vector v_mpi */ 1578 PetscCall(MatGetLocalSize(B, &m, NULL)); 1579 PetscCall(MatDenseGetArray(B, &bray)); 1580 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1581 PetscCall(MatDenseRestoreArray(B, &bray)); 1582 1583 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1584 if (!mumps->myid) { 1585 PetscInt *idx; 1586 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1587 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1588 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1589 k = 0; 1590 for (proc = 0; proc < mumps->petsc_size; proc++) { 1591 for (j = 0; j < nrhs; j++) { 1592 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1593 } 1594 } 1595 1596 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1597 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1598 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1599 } else { 1600 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1601 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1602 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1603 } 1604 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1605 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1606 PetscCall(ISDestroy(&is_to)); 1607 PetscCall(ISDestroy(&is_from)); 1608 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1609 1610 if (!mumps->myid) { /* define rhs on the host */ 1611 PetscCall(VecGetArray(b_seq, &bray)); 1612 mumps->id.rhs = (MumpsScalar *)bray; 1613 PetscCall(VecRestoreArray(b_seq, &bray)); 1614 } 1615 } 1616 } else { /* sparse B */ 1617 b = (Mat_MPIAIJ *)Bt->data; 1618 1619 /* wrap dense X into a vector v_mpi */ 1620 PetscCall(MatGetLocalSize(X, &m, NULL)); 1621 PetscCall(MatDenseGetArray(X, &bray)); 1622 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1623 PetscCall(MatDenseRestoreArray(X, &bray)); 1624 1625 if (!mumps->myid) { 1626 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1627 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1628 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1629 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1630 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1631 } else { 1632 mumps->id.irhs_ptr = NULL; 1633 mumps->id.irhs_sparse = NULL; 1634 mumps->id.nz_rhs = 0; 1635 mumps->id.rhs_sparse = NULL; 1636 } 1637 } 1638 1639 /* solve phase */ 1640 mumps->id.job = JOB_SOLVE; 1641 PetscMUMPS_c(mumps); 1642 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1643 1644 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1645 PetscCall(MatDenseGetArray(X, &array)); 1646 PetscCall(VecPlaceArray(v_mpi, array)); 1647 1648 /* create scatter scat_sol */ 1649 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1650 /* iidx: index for scatter mumps solution to petsc X */ 1651 1652 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1653 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1654 for (i = 0; i < lsol_loc; i++) { 1655 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1656 1657 for (proc = 0; proc < mumps->petsc_size; proc++) { 1658 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1659 myrstart = rstart[proc]; 1660 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1661 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1662 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1663 break; 1664 } 1665 } 1666 1667 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1668 } 1669 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1670 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1671 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1672 PetscCall(ISDestroy(&is_from)); 1673 PetscCall(ISDestroy(&is_to)); 1674 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1675 PetscCall(MatDenseRestoreArray(X, &array)); 1676 1677 /* free spaces */ 1678 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1679 mumps->id.isol_loc = isol_loc_save; 1680 1681 PetscCall(PetscFree2(sol_loc, isol_loc)); 1682 PetscCall(PetscFree(idxx)); 1683 PetscCall(VecDestroy(&msol_loc)); 1684 PetscCall(VecDestroy(&v_mpi)); 1685 if (!denseB) { 1686 if (!mumps->myid) { 1687 b = (Mat_MPIAIJ *)Bt->data; 1688 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1689 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1690 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1691 } 1692 } else { 1693 if (mumps->ICNTL20 == 0) { 1694 PetscCall(VecDestroy(&b_seq)); 1695 PetscCall(VecScatterDestroy(&scat_rhs)); 1696 } 1697 } 1698 PetscCall(VecScatterDestroy(&scat_sol)); 1699 PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3))); 1700 PetscFunctionReturn(PETSC_SUCCESS); 1701 } 1702 1703 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1704 { 1705 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1706 PetscMUMPSInt oldvalue = mumps->id.ICNTL(9); 1707 1708 PetscFunctionBegin; 1709 mumps->id.ICNTL(9) = 0; 1710 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1711 mumps->id.ICNTL(9) = oldvalue; 1712 PetscFunctionReturn(PETSC_SUCCESS); 1713 } 1714 1715 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1716 { 1717 PetscBool flg; 1718 Mat B; 1719 1720 PetscFunctionBegin; 1721 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1722 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1723 1724 /* Create B=Bt^T that uses Bt's data structure */ 1725 PetscCall(MatCreateTranspose(Bt, &B)); 1726 1727 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1728 PetscCall(MatDestroy(&B)); 1729 PetscFunctionReturn(PETSC_SUCCESS); 1730 } 1731 1732 #if !defined(PETSC_USE_COMPLEX) 1733 /* 1734 input: 1735 F: numeric factor 1736 output: 1737 nneg: total number of negative pivots 1738 nzero: total number of zero pivots 1739 npos: (global dimension of F) - nneg - nzero 1740 */ 1741 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1742 { 1743 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1744 PetscMPIInt size; 1745 1746 PetscFunctionBegin; 1747 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1748 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1749 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1750 1751 if (nneg) *nneg = mumps->id.INFOG(12); 1752 if (nzero || npos) { 1753 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1754 if (nzero) *nzero = mumps->id.INFOG(28); 1755 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1756 } 1757 PetscFunctionReturn(PETSC_SUCCESS); 1758 } 1759 #endif 1760 1761 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1762 { 1763 PetscInt i, nreqs; 1764 PetscMUMPSInt *irn, *jcn; 1765 PetscMPIInt count; 1766 PetscInt64 totnnz, remain; 1767 const PetscInt osize = mumps->omp_comm_size; 1768 PetscScalar *val; 1769 1770 PetscFunctionBegin; 1771 if (osize > 1) { 1772 if (reuse == MAT_INITIAL_MATRIX) { 1773 /* master first gathers counts of nonzeros to receive */ 1774 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1775 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1776 1777 /* Then each computes number of send/recvs */ 1778 if (mumps->is_omp_master) { 1779 /* Start from 1 since self communication is not done in MPI */ 1780 nreqs = 0; 1781 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1782 } else { 1783 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1784 } 1785 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1786 1787 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1788 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1789 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1790 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1791 */ 1792 nreqs = 0; /* counter for actual send/recvs */ 1793 if (mumps->is_omp_master) { 1794 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1795 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1796 PetscCall(PetscMalloc1(totnnz, &val)); 1797 1798 /* Self communication */ 1799 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1800 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1801 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1802 1803 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1804 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1805 PetscCall(PetscFree(mumps->val_alloc)); 1806 mumps->nnz = totnnz; 1807 mumps->irn = irn; 1808 mumps->jcn = jcn; 1809 mumps->val = mumps->val_alloc = val; 1810 1811 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1812 jcn += mumps->recvcount[0]; 1813 val += mumps->recvcount[0]; 1814 1815 /* Remote communication */ 1816 for (i = 1; i < osize; i++) { 1817 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1818 remain = mumps->recvcount[i] - count; 1819 while (count > 0) { 1820 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1821 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1822 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1823 irn += count; 1824 jcn += count; 1825 val += count; 1826 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1827 remain -= count; 1828 } 1829 } 1830 } else { 1831 irn = mumps->irn; 1832 jcn = mumps->jcn; 1833 val = mumps->val; 1834 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1835 remain = mumps->nnz - count; 1836 while (count > 0) { 1837 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1838 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1839 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1840 irn += count; 1841 jcn += count; 1842 val += count; 1843 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1844 remain -= count; 1845 } 1846 } 1847 } else { 1848 nreqs = 0; 1849 if (mumps->is_omp_master) { 1850 val = mumps->val + mumps->recvcount[0]; 1851 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1852 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1853 remain = mumps->recvcount[i] - count; 1854 while (count > 0) { 1855 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1856 val += count; 1857 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1858 remain -= count; 1859 } 1860 } 1861 } else { 1862 val = mumps->val; 1863 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1864 remain = mumps->nnz - count; 1865 while (count > 0) { 1866 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1867 val += count; 1868 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1869 remain -= count; 1870 } 1871 } 1872 } 1873 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1874 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1875 } 1876 PetscFunctionReturn(PETSC_SUCCESS); 1877 } 1878 1879 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1880 { 1881 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1882 PetscBool isMPIAIJ; 1883 1884 PetscFunctionBegin; 1885 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1886 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1887 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1888 PetscFunctionReturn(PETSC_SUCCESS); 1889 } 1890 1891 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1892 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1893 1894 /* numerical factorization phase */ 1895 mumps->id.job = JOB_FACTNUMERIC; 1896 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1897 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1898 } else { 1899 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1900 } 1901 PetscMUMPS_c(mumps); 1902 if (mumps->id.INFOG(1) < 0) { 1903 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 1904 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1905 PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1906 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1907 } else if (mumps->id.INFOG(1) == -13) { 1908 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1909 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1910 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1911 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1912 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1913 } else { 1914 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1915 F->factorerrortype = MAT_FACTOR_OTHER; 1916 } 1917 } 1918 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, " mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16)); 1919 1920 F->assembled = PETSC_TRUE; 1921 1922 if (F->schur) { /* reset Schur status to unfactored */ 1923 #if defined(PETSC_HAVE_CUDA) 1924 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1925 #endif 1926 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1927 mumps->id.ICNTL(19) = 2; 1928 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1929 } 1930 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1931 } 1932 1933 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1934 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1935 1936 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1937 if (mumps->petsc_size > 1) { 1938 PetscInt lsol_loc; 1939 PetscScalar *sol_loc; 1940 1941 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1942 1943 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1944 if (mumps->x_seq) { 1945 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1946 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1947 PetscCall(VecDestroy(&mumps->x_seq)); 1948 } 1949 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1950 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1951 mumps->id.lsol_loc = lsol_loc; 1952 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1953 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 1954 } 1955 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1956 PetscFunctionReturn(PETSC_SUCCESS); 1957 } 1958 1959 /* Sets MUMPS options from the options database */ 1960 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1961 { 1962 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1963 PetscMUMPSInt icntl = 0, size, *listvar_schur; 1964 PetscInt info[80], i, ninfo = 80, rbs, cbs; 1965 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1966 MumpsScalar *arr; 1967 1968 PetscFunctionBegin; 1969 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 1970 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1971 PetscInt nthreads = 0; 1972 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1973 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1974 1975 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1976 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 1977 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 1978 1979 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 1980 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1981 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1982 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 1983 if (mumps->use_petsc_omp_support) { 1984 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 1985 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1986 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1987 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1988 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 1989 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 1990 #endif 1991 } else { 1992 mumps->omp_comm = PETSC_COMM_SELF; 1993 mumps->mumps_comm = mumps->petsc_comm; 1994 mumps->is_omp_master = PETSC_TRUE; 1995 } 1996 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 1997 mumps->reqs = NULL; 1998 mumps->tag = 0; 1999 2000 if (mumps->mumps_comm != MPI_COMM_NULL) { 2001 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2002 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2003 MPI_Comm comm; 2004 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2005 mumps->mumps_comm = comm; 2006 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2007 } 2008 2009 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2010 mumps->id.job = JOB_INIT; 2011 mumps->id.par = 1; /* host participates factorizaton and solve */ 2012 mumps->id.sym = mumps->sym; 2013 2014 size = mumps->id.size_schur; 2015 arr = mumps->id.schur; 2016 listvar_schur = mumps->id.listvar_schur; 2017 PetscMUMPS_c(mumps); 2018 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 2019 /* restore cached ICNTL and CNTL values */ 2020 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2021 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2022 PetscCall(PetscFree(mumps->ICNTL_pre)); 2023 PetscCall(PetscFree(mumps->CNTL_pre)); 2024 2025 if (schur) { 2026 mumps->id.size_schur = size; 2027 mumps->id.schur_lld = size; 2028 mumps->id.schur = arr; 2029 mumps->id.listvar_schur = listvar_schur; 2030 if (mumps->petsc_size > 1) { 2031 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2032 2033 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2034 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2035 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2036 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2037 } else { 2038 if (F->factortype == MAT_FACTOR_LU) { 2039 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2040 } else { 2041 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2042 } 2043 } 2044 mumps->id.ICNTL(26) = -1; 2045 } 2046 2047 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2048 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2049 */ 2050 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2051 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2052 2053 mumps->scat_rhs = NULL; 2054 mumps->scat_sol = NULL; 2055 2056 /* set PETSc-MUMPS default options - override MUMPS default */ 2057 mumps->id.ICNTL(3) = 0; 2058 mumps->id.ICNTL(4) = 0; 2059 if (mumps->petsc_size == 1) { 2060 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2061 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2062 } else { 2063 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2064 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2065 } 2066 } 2067 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2068 if (flg) mumps->id.ICNTL(1) = icntl; 2069 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2070 if (flg) mumps->id.ICNTL(2) = icntl; 2071 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2072 if (flg) mumps->id.ICNTL(3) = icntl; 2073 2074 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2075 if (flg) mumps->id.ICNTL(4) = icntl; 2076 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2077 2078 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2079 if (flg) mumps->id.ICNTL(6) = icntl; 2080 2081 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2082 if (flg) { 2083 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2084 mumps->id.ICNTL(7) = icntl; 2085 } 2086 2087 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2088 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2089 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2090 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2091 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2092 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2093 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2094 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2095 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2096 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2097 if (flg) { 2098 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2099 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2100 } 2101 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2102 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2103 PetscCall(MatDestroy(&F->schur)); 2104 PetscCall(MatMumpsResetSchur_Private(mumps)); 2105 } 2106 2107 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2108 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2109 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2110 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2111 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2112 In short, we could not use distributed RHS with MPICH until v4.0b1. 2113 */ 2114 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 2115 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2116 #else 2117 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2118 #endif 2119 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2120 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2121 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2122 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2123 #endif 2124 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2125 2126 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2127 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2128 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2129 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2130 2131 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2132 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2133 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2134 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2135 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2136 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2137 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2138 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2139 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2140 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2141 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2142 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2143 2144 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2145 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2146 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2147 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2148 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2149 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2150 2151 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2152 2153 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2154 if (ninfo) { 2155 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2156 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2157 mumps->ninfo = ninfo; 2158 for (i = 0; i < ninfo; i++) { 2159 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2160 mumps->info[i] = info[i]; 2161 } 2162 } 2163 PetscOptionsEnd(); 2164 PetscFunctionReturn(PETSC_SUCCESS); 2165 } 2166 2167 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2168 { 2169 PetscFunctionBegin; 2170 if (mumps->id.INFOG(1) < 0) { 2171 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1)); 2172 if (mumps->id.INFOG(1) == -6) { 2173 PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2174 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2175 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2176 PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2177 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2178 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2179 PetscCall(PetscInfo(F, "Empty matrix\n")); 2180 } else { 2181 PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2182 F->factorerrortype = MAT_FACTOR_OTHER; 2183 } 2184 } 2185 PetscFunctionReturn(PETSC_SUCCESS); 2186 } 2187 2188 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2189 { 2190 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2191 Vec b; 2192 const PetscInt M = A->rmap->N; 2193 2194 PetscFunctionBegin; 2195 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2196 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2197 PetscFunctionReturn(PETSC_SUCCESS); 2198 } 2199 2200 /* Set MUMPS options from the options database */ 2201 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2202 2203 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2204 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2205 2206 /* analysis phase */ 2207 mumps->id.job = JOB_FACTSYMBOLIC; 2208 mumps->id.n = M; 2209 switch (mumps->id.ICNTL(18)) { 2210 case 0: /* centralized assembled matrix input */ 2211 if (!mumps->myid) { 2212 mumps->id.nnz = mumps->nnz; 2213 mumps->id.irn = mumps->irn; 2214 mumps->id.jcn = mumps->jcn; 2215 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2216 if (r) { 2217 mumps->id.ICNTL(7) = 1; 2218 if (!mumps->myid) { 2219 const PetscInt *idx; 2220 PetscInt i; 2221 2222 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2223 PetscCall(ISGetIndices(r, &idx)); 2224 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2225 PetscCall(ISRestoreIndices(r, &idx)); 2226 } 2227 } 2228 } 2229 break; 2230 case 3: /* distributed assembled matrix input (size>1) */ 2231 mumps->id.nnz_loc = mumps->nnz; 2232 mumps->id.irn_loc = mumps->irn; 2233 mumps->id.jcn_loc = mumps->jcn; 2234 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2235 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2236 PetscCall(MatCreateVecs(A, NULL, &b)); 2237 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2238 PetscCall(VecDestroy(&b)); 2239 } 2240 break; 2241 } 2242 PetscMUMPS_c(mumps); 2243 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2244 2245 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2246 F->ops->solve = MatSolve_MUMPS; 2247 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2248 F->ops->matsolve = MatMatSolve_MUMPS; 2249 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2250 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2251 2252 mumps->matstruc = SAME_NONZERO_PATTERN; 2253 PetscFunctionReturn(PETSC_SUCCESS); 2254 } 2255 2256 /* Note the Petsc r and c permutations are ignored */ 2257 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2258 { 2259 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2260 Vec b; 2261 const PetscInt M = A->rmap->N; 2262 2263 PetscFunctionBegin; 2264 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2265 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2266 PetscFunctionReturn(PETSC_SUCCESS); 2267 } 2268 2269 /* Set MUMPS options from the options database */ 2270 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2271 2272 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2273 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2274 2275 /* analysis phase */ 2276 mumps->id.job = JOB_FACTSYMBOLIC; 2277 mumps->id.n = M; 2278 switch (mumps->id.ICNTL(18)) { 2279 case 0: /* centralized assembled matrix input */ 2280 if (!mumps->myid) { 2281 mumps->id.nnz = mumps->nnz; 2282 mumps->id.irn = mumps->irn; 2283 mumps->id.jcn = mumps->jcn; 2284 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2285 } 2286 break; 2287 case 3: /* distributed assembled matrix input (size>1) */ 2288 mumps->id.nnz_loc = mumps->nnz; 2289 mumps->id.irn_loc = mumps->irn; 2290 mumps->id.jcn_loc = mumps->jcn; 2291 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2292 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2293 PetscCall(MatCreateVecs(A, NULL, &b)); 2294 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2295 PetscCall(VecDestroy(&b)); 2296 } 2297 break; 2298 } 2299 PetscMUMPS_c(mumps); 2300 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2301 2302 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2303 F->ops->solve = MatSolve_MUMPS; 2304 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2305 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2306 2307 mumps->matstruc = SAME_NONZERO_PATTERN; 2308 PetscFunctionReturn(PETSC_SUCCESS); 2309 } 2310 2311 /* Note the Petsc r permutation and factor info are ignored */ 2312 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2313 { 2314 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2315 Vec b; 2316 const PetscInt M = A->rmap->N; 2317 2318 PetscFunctionBegin; 2319 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2320 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2321 PetscFunctionReturn(PETSC_SUCCESS); 2322 } 2323 2324 /* Set MUMPS options from the options database */ 2325 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2326 2327 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2328 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2329 2330 /* analysis phase */ 2331 mumps->id.job = JOB_FACTSYMBOLIC; 2332 mumps->id.n = M; 2333 switch (mumps->id.ICNTL(18)) { 2334 case 0: /* centralized assembled matrix input */ 2335 if (!mumps->myid) { 2336 mumps->id.nnz = mumps->nnz; 2337 mumps->id.irn = mumps->irn; 2338 mumps->id.jcn = mumps->jcn; 2339 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2340 } 2341 break; 2342 case 3: /* distributed assembled matrix input (size>1) */ 2343 mumps->id.nnz_loc = mumps->nnz; 2344 mumps->id.irn_loc = mumps->irn; 2345 mumps->id.jcn_loc = mumps->jcn; 2346 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2347 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2348 PetscCall(MatCreateVecs(A, NULL, &b)); 2349 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2350 PetscCall(VecDestroy(&b)); 2351 } 2352 break; 2353 } 2354 PetscMUMPS_c(mumps); 2355 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2356 2357 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2358 F->ops->solve = MatSolve_MUMPS; 2359 F->ops->solvetranspose = MatSolve_MUMPS; 2360 F->ops->matsolve = MatMatSolve_MUMPS; 2361 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2362 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2363 #if defined(PETSC_USE_COMPLEX) 2364 F->ops->getinertia = NULL; 2365 #else 2366 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2367 #endif 2368 2369 mumps->matstruc = SAME_NONZERO_PATTERN; 2370 PetscFunctionReturn(PETSC_SUCCESS); 2371 } 2372 2373 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2374 { 2375 PetscBool iascii; 2376 PetscViewerFormat format; 2377 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2378 2379 PetscFunctionBegin; 2380 /* check if matrix is mumps type */ 2381 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2382 2383 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2384 if (iascii) { 2385 PetscCall(PetscViewerGetFormat(viewer, &format)); 2386 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2387 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2388 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2389 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2390 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2391 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2392 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2393 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2394 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2395 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2396 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2397 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2398 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2399 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2400 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2401 if (mumps->id.ICNTL(11) > 0) { 2402 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2403 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2404 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2405 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2406 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2407 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2408 } 2409 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2410 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2411 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2412 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2413 /* ICNTL(15-17) not used */ 2414 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2415 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2416 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2417 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2418 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2419 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2420 2421 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2422 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2423 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2424 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2425 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2426 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2427 2428 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2429 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2430 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2431 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2432 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2433 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2434 2435 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2436 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2437 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2438 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2439 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2440 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2441 2442 /* information local to each processor */ 2443 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2444 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2445 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2446 PetscCall(PetscViewerFlush(viewer)); 2447 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2448 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2449 PetscCall(PetscViewerFlush(viewer)); 2450 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2451 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2452 PetscCall(PetscViewerFlush(viewer)); 2453 2454 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2455 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2456 PetscCall(PetscViewerFlush(viewer)); 2457 2458 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2459 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2460 PetscCall(PetscViewerFlush(viewer)); 2461 2462 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2463 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2464 PetscCall(PetscViewerFlush(viewer)); 2465 2466 if (mumps->ninfo && mumps->ninfo <= 80) { 2467 PetscInt i; 2468 for (i = 0; i < mumps->ninfo; i++) { 2469 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2470 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2471 PetscCall(PetscViewerFlush(viewer)); 2472 } 2473 } 2474 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2475 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2476 2477 if (mumps->myid == 0) { /* information from the host */ 2478 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2479 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2480 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2481 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2482 2483 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2484 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2485 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2486 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2487 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2488 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2489 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2490 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2491 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2492 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2493 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2494 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2495 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2496 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2497 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2498 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2499 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2500 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2501 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2502 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2503 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2504 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2505 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2506 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2507 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2508 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2509 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2510 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2511 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2512 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2513 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2514 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2515 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2516 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2517 } 2518 } 2519 } 2520 PetscFunctionReturn(PETSC_SUCCESS); 2521 } 2522 2523 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2524 { 2525 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2526 2527 PetscFunctionBegin; 2528 info->block_size = 1.0; 2529 info->nz_allocated = mumps->id.INFOG(20); 2530 info->nz_used = mumps->id.INFOG(20); 2531 info->nz_unneeded = 0.0; 2532 info->assemblies = 0.0; 2533 info->mallocs = 0.0; 2534 info->memory = 0.0; 2535 info->fill_ratio_given = 0; 2536 info->fill_ratio_needed = 0; 2537 info->factor_mallocs = 0; 2538 PetscFunctionReturn(PETSC_SUCCESS); 2539 } 2540 2541 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2542 { 2543 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2544 const PetscScalar *arr; 2545 const PetscInt *idxs; 2546 PetscInt size, i; 2547 2548 PetscFunctionBegin; 2549 PetscCall(ISGetLocalSize(is, &size)); 2550 /* Schur complement matrix */ 2551 PetscCall(MatDestroy(&F->schur)); 2552 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2553 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2554 mumps->id.schur = (MumpsScalar *)arr; 2555 mumps->id.size_schur = size; 2556 mumps->id.schur_lld = size; 2557 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2558 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2559 2560 /* MUMPS expects Fortran style indices */ 2561 PetscCall(PetscFree(mumps->id.listvar_schur)); 2562 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2563 PetscCall(ISGetIndices(is, &idxs)); 2564 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2565 PetscCall(ISRestoreIndices(is, &idxs)); 2566 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2567 mumps->id.ICNTL(26) = -1; 2568 PetscFunctionReturn(PETSC_SUCCESS); 2569 } 2570 2571 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2572 { 2573 Mat St; 2574 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2575 PetscScalar *array; 2576 #if defined(PETSC_USE_COMPLEX) 2577 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2578 #endif 2579 2580 PetscFunctionBegin; 2581 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2582 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2583 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2584 PetscCall(MatSetType(St, MATDENSE)); 2585 PetscCall(MatSetUp(St)); 2586 PetscCall(MatDenseGetArray(St, &array)); 2587 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2588 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2589 PetscInt i, j, N = mumps->id.size_schur; 2590 for (i = 0; i < N; i++) { 2591 for (j = 0; j < N; j++) { 2592 #if !defined(PETSC_USE_COMPLEX) 2593 PetscScalar val = mumps->id.schur[i * N + j]; 2594 #else 2595 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2596 #endif 2597 array[j * N + i] = val; 2598 } 2599 } 2600 } else { /* stored by columns */ 2601 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2602 } 2603 } else { /* either full or lower-triangular (not packed) */ 2604 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2605 PetscInt i, j, N = mumps->id.size_schur; 2606 for (i = 0; i < N; i++) { 2607 for (j = i; j < N; j++) { 2608 #if !defined(PETSC_USE_COMPLEX) 2609 PetscScalar val = mumps->id.schur[i * N + j]; 2610 #else 2611 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2612 #endif 2613 array[i * N + j] = val; 2614 array[j * N + i] = val; 2615 } 2616 } 2617 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2618 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2619 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2620 PetscInt i, j, N = mumps->id.size_schur; 2621 for (i = 0; i < N; i++) { 2622 for (j = 0; j < i + 1; j++) { 2623 #if !defined(PETSC_USE_COMPLEX) 2624 PetscScalar val = mumps->id.schur[i * N + j]; 2625 #else 2626 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2627 #endif 2628 array[i * N + j] = val; 2629 array[j * N + i] = val; 2630 } 2631 } 2632 } 2633 } 2634 PetscCall(MatDenseRestoreArray(St, &array)); 2635 *S = St; 2636 PetscFunctionReturn(PETSC_SUCCESS); 2637 } 2638 2639 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2640 { 2641 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2642 2643 PetscFunctionBegin; 2644 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2645 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2646 for (i = 0; i < nICNTL_pre; ++i) 2647 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2648 if (i == nICNTL_pre) { /* not already cached */ 2649 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2650 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2651 mumps->ICNTL_pre[0]++; 2652 } 2653 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2654 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2655 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2656 PetscFunctionReturn(PETSC_SUCCESS); 2657 } 2658 2659 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2660 { 2661 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2662 2663 PetscFunctionBegin; 2664 if (mumps->id.job == JOB_NULL) { 2665 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2666 *ival = 0; 2667 for (i = 0; i < nICNTL_pre; ++i) { 2668 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2669 } 2670 } else *ival = mumps->id.ICNTL(icntl); 2671 PetscFunctionReturn(PETSC_SUCCESS); 2672 } 2673 2674 /*@ 2675 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2676 2677 Logically Collective 2678 2679 Input Parameters: 2680 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2681 . icntl - index of MUMPS parameter array ICNTL() 2682 - ival - value of MUMPS ICNTL(icntl) 2683 2684 Options Database Key: 2685 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2686 2687 Level: beginner 2688 2689 References: 2690 . * - MUMPS Users' Guide 2691 2692 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2693 @*/ 2694 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2695 { 2696 PetscFunctionBegin; 2697 PetscValidType(F, 1); 2698 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2699 PetscValidLogicalCollectiveInt(F, icntl, 2); 2700 PetscValidLogicalCollectiveInt(F, ival, 3); 2701 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2702 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2703 PetscFunctionReturn(PETSC_SUCCESS); 2704 } 2705 2706 /*@ 2707 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2708 2709 Logically Collective 2710 2711 Input Parameters: 2712 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2713 - icntl - index of MUMPS parameter array ICNTL() 2714 2715 Output Parameter: 2716 . ival - value of MUMPS ICNTL(icntl) 2717 2718 Level: beginner 2719 2720 References: 2721 . * - MUMPS Users' Guide 2722 2723 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2724 @*/ 2725 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2726 { 2727 PetscFunctionBegin; 2728 PetscValidType(F, 1); 2729 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2730 PetscValidLogicalCollectiveInt(F, icntl, 2); 2731 PetscValidIntPointer(ival, 3); 2732 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2733 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2734 PetscFunctionReturn(PETSC_SUCCESS); 2735 } 2736 2737 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2738 { 2739 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2740 2741 PetscFunctionBegin; 2742 if (mumps->id.job == JOB_NULL) { 2743 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2744 for (i = 0; i < nCNTL_pre; ++i) 2745 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2746 if (i == nCNTL_pre) { 2747 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2748 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2749 mumps->CNTL_pre[0]++; 2750 } 2751 mumps->CNTL_pre[1 + 2 * i] = icntl; 2752 mumps->CNTL_pre[2 + 2 * i] = val; 2753 } else mumps->id.CNTL(icntl) = val; 2754 PetscFunctionReturn(PETSC_SUCCESS); 2755 } 2756 2757 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2758 { 2759 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2760 2761 PetscFunctionBegin; 2762 if (mumps->id.job == JOB_NULL) { 2763 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2764 *val = 0.0; 2765 for (i = 0; i < nCNTL_pre; ++i) { 2766 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2767 } 2768 } else *val = mumps->id.CNTL(icntl); 2769 PetscFunctionReturn(PETSC_SUCCESS); 2770 } 2771 2772 /*@ 2773 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2774 2775 Logically Collective 2776 2777 Input Parameters: 2778 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2779 . icntl - index of MUMPS parameter array CNTL() 2780 - val - value of MUMPS CNTL(icntl) 2781 2782 Options Database Key: 2783 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2784 2785 Level: beginner 2786 2787 References: 2788 . * - MUMPS Users' Guide 2789 2790 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2791 @*/ 2792 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2793 { 2794 PetscFunctionBegin; 2795 PetscValidType(F, 1); 2796 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2797 PetscValidLogicalCollectiveInt(F, icntl, 2); 2798 PetscValidLogicalCollectiveReal(F, val, 3); 2799 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2800 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2801 PetscFunctionReturn(PETSC_SUCCESS); 2802 } 2803 2804 /*@ 2805 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2806 2807 Logically Collective 2808 2809 Input Parameters: 2810 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2811 - icntl - index of MUMPS parameter array CNTL() 2812 2813 Output Parameter: 2814 . val - value of MUMPS CNTL(icntl) 2815 2816 Level: beginner 2817 2818 References: 2819 . * - MUMPS Users' Guide 2820 2821 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2822 @*/ 2823 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2824 { 2825 PetscFunctionBegin; 2826 PetscValidType(F, 1); 2827 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2828 PetscValidLogicalCollectiveInt(F, icntl, 2); 2829 PetscValidRealPointer(val, 3); 2830 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2831 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2832 PetscFunctionReturn(PETSC_SUCCESS); 2833 } 2834 2835 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2836 { 2837 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2838 2839 PetscFunctionBegin; 2840 *info = mumps->id.INFO(icntl); 2841 PetscFunctionReturn(PETSC_SUCCESS); 2842 } 2843 2844 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2845 { 2846 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2847 2848 PetscFunctionBegin; 2849 *infog = mumps->id.INFOG(icntl); 2850 PetscFunctionReturn(PETSC_SUCCESS); 2851 } 2852 2853 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2854 { 2855 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2856 2857 PetscFunctionBegin; 2858 *rinfo = mumps->id.RINFO(icntl); 2859 PetscFunctionReturn(PETSC_SUCCESS); 2860 } 2861 2862 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2863 { 2864 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2865 2866 PetscFunctionBegin; 2867 *rinfog = mumps->id.RINFOG(icntl); 2868 PetscFunctionReturn(PETSC_SUCCESS); 2869 } 2870 2871 PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2872 { 2873 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2874 2875 PetscFunctionBegin; 2876 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2877 *size = 0; 2878 *array = NULL; 2879 if (!mumps->myid) { 2880 *size = mumps->id.INFOG(28); 2881 PetscCall(PetscMalloc1(*size, array)); 2882 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2883 } 2884 PetscFunctionReturn(PETSC_SUCCESS); 2885 } 2886 2887 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2888 { 2889 Mat Bt = NULL, Btseq = NULL; 2890 PetscBool flg; 2891 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2892 PetscScalar *aa; 2893 PetscInt spnr, *ia, *ja, M, nrhs; 2894 2895 PetscFunctionBegin; 2896 PetscValidPointer(spRHS, 2); 2897 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2898 if (flg) { 2899 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2900 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2901 2902 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2903 2904 if (mumps->petsc_size > 1) { 2905 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2906 Btseq = b->A; 2907 } else { 2908 Btseq = Bt; 2909 } 2910 2911 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2912 mumps->id.nrhs = nrhs; 2913 mumps->id.lrhs = M; 2914 mumps->id.rhs = NULL; 2915 2916 if (!mumps->myid) { 2917 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2918 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2919 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2920 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2921 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2922 } else { 2923 mumps->id.irhs_ptr = NULL; 2924 mumps->id.irhs_sparse = NULL; 2925 mumps->id.nz_rhs = 0; 2926 mumps->id.rhs_sparse = NULL; 2927 } 2928 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2929 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2930 2931 /* solve phase */ 2932 mumps->id.job = JOB_SOLVE; 2933 PetscMUMPS_c(mumps); 2934 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 2935 2936 if (!mumps->myid) { 2937 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 2938 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2939 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2940 } 2941 PetscFunctionReturn(PETSC_SUCCESS); 2942 } 2943 2944 /*@ 2945 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 2946 2947 Logically Collective 2948 2949 Input Parameter: 2950 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2951 2952 Output Parameter: 2953 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 2954 2955 Level: beginner 2956 2957 References: 2958 . * - MUMPS Users' Guide 2959 2960 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 2961 @*/ 2962 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2963 { 2964 PetscFunctionBegin; 2965 PetscValidType(F, 1); 2966 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2967 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 2968 PetscFunctionReturn(PETSC_SUCCESS); 2969 } 2970 2971 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 2972 { 2973 Mat spRHS; 2974 2975 PetscFunctionBegin; 2976 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 2977 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 2978 PetscCall(MatDestroy(&spRHS)); 2979 PetscFunctionReturn(PETSC_SUCCESS); 2980 } 2981 2982 /*@ 2983 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 2984 2985 Logically Collective 2986 2987 Input Parameter: 2988 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2989 2990 Output Parameter: 2991 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 2992 2993 Level: beginner 2994 2995 References: 2996 . * - MUMPS Users' Guide 2997 2998 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 2999 @*/ 3000 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3001 { 3002 PetscBool flg; 3003 3004 PetscFunctionBegin; 3005 PetscValidType(F, 1); 3006 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3007 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3008 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3009 3010 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3011 PetscFunctionReturn(PETSC_SUCCESS); 3012 } 3013 3014 /*@ 3015 MatMumpsGetInfo - Get MUMPS parameter INFO() 3016 3017 Logically Collective 3018 3019 Input Parameters: 3020 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3021 - icntl - index of MUMPS parameter array INFO() 3022 3023 Output Parameter: 3024 . ival - value of MUMPS INFO(icntl) 3025 3026 Level: beginner 3027 3028 References: 3029 . * - MUMPS Users' Guide 3030 3031 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3032 @*/ 3033 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3034 { 3035 PetscFunctionBegin; 3036 PetscValidType(F, 1); 3037 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3038 PetscValidIntPointer(ival, 3); 3039 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3040 PetscFunctionReturn(PETSC_SUCCESS); 3041 } 3042 3043 /*@ 3044 MatMumpsGetInfog - Get MUMPS parameter INFOG() 3045 3046 Logically Collective 3047 3048 Input Parameters: 3049 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3050 - icntl - index of MUMPS parameter array INFOG() 3051 3052 Output Parameter: 3053 . ival - value of MUMPS INFOG(icntl) 3054 3055 Level: beginner 3056 3057 References: 3058 . * - MUMPS Users' Guide 3059 3060 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3061 @*/ 3062 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3063 { 3064 PetscFunctionBegin; 3065 PetscValidType(F, 1); 3066 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3067 PetscValidIntPointer(ival, 3); 3068 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3069 PetscFunctionReturn(PETSC_SUCCESS); 3070 } 3071 3072 /*@ 3073 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3074 3075 Logically Collective 3076 3077 Input Parameters: 3078 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3079 - icntl - index of MUMPS parameter array RINFO() 3080 3081 Output Parameter: 3082 . val - value of MUMPS RINFO(icntl) 3083 3084 Level: beginner 3085 3086 References: 3087 . * - MUMPS Users' Guide 3088 3089 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3090 @*/ 3091 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3092 { 3093 PetscFunctionBegin; 3094 PetscValidType(F, 1); 3095 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3096 PetscValidRealPointer(val, 3); 3097 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3098 PetscFunctionReturn(PETSC_SUCCESS); 3099 } 3100 3101 /*@ 3102 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3103 3104 Logically Collective 3105 3106 Input Parameters: 3107 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3108 - icntl - index of MUMPS parameter array RINFOG() 3109 3110 Output Parameter: 3111 . val - value of MUMPS RINFOG(icntl) 3112 3113 Level: beginner 3114 3115 References: 3116 . * - MUMPS Users' Guide 3117 3118 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3119 @*/ 3120 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3121 { 3122 PetscFunctionBegin; 3123 PetscValidType(F, 1); 3124 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3125 PetscValidRealPointer(val, 3); 3126 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3127 PetscFunctionReturn(PETSC_SUCCESS); 3128 } 3129 3130 /*@ 3131 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 3132 3133 Logically Collective 3134 3135 Input Parameter: 3136 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3137 3138 Output Parameters: 3139 + size - local size of the array. The size of the array is non-zero only on the host. 3140 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3141 for freeing this array. 3142 3143 Level: beginner 3144 3145 References: 3146 . * - MUMPS Users' Guide 3147 3148 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3149 @*/ 3150 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3151 { 3152 PetscFunctionBegin; 3153 PetscValidType(F, 1); 3154 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3155 PetscValidIntPointer(size, 3); 3156 PetscValidPointer(array, 4); 3157 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3158 PetscFunctionReturn(PETSC_SUCCESS); 3159 } 3160 3161 /*MC 3162 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3163 distributed and sequential matrices via the external package MUMPS. 3164 3165 Works with `MATAIJ` and `MATSBAIJ` matrices 3166 3167 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3168 3169 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3170 See details below. 3171 3172 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3173 3174 Options Database Keys: 3175 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3176 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3177 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3178 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3179 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3180 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3181 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3182 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3183 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3184 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3185 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3186 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3187 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3188 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3189 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3190 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3191 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3192 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3193 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3194 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3195 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3196 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 3197 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3198 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3199 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3200 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3201 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3202 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3203 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3204 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3205 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3206 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3207 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3208 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3209 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3210 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3211 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3212 3213 Level: beginner 3214 3215 Notes: 3216 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 3217 error if the matrix is Hermitian. 3218 3219 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3220 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3221 3222 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3223 the failure with 3224 .vb 3225 KSPGetPC(ksp,&pc); 3226 PCFactorGetMatrix(pc,&mat); 3227 MatMumpsGetInfo(mat,....); 3228 MatMumpsGetInfog(mat,....); etc. 3229 .ve 3230 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3231 3232 MUMPS provides 64-bit integer support in two build modes: 3233 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3234 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3235 3236 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3237 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3238 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3239 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3240 3241 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3242 3243 Two modes to run MUMPS/PETSc with OpenMP 3244 .vb 3245 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3246 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3247 .ve 3248 3249 .vb 3250 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3251 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3252 .ve 3253 3254 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3255 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3256 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3257 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3258 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3259 3260 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3261 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3262 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3263 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3264 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3265 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3266 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3267 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3268 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3269 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3270 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3271 examine the mapping result. 3272 3273 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3274 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3275 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3276 3277 References: 3278 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3279 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3280 3281 .seealso: [](chapter_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3282 M*/ 3283 3284 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3285 { 3286 PetscFunctionBegin; 3287 *type = MATSOLVERMUMPS; 3288 PetscFunctionReturn(PETSC_SUCCESS); 3289 } 3290 3291 /* MatGetFactor for Seq and MPI AIJ matrices */ 3292 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3293 { 3294 Mat B; 3295 Mat_MUMPS *mumps; 3296 PetscBool isSeqAIJ; 3297 PetscMPIInt size; 3298 3299 PetscFunctionBegin; 3300 #if defined(PETSC_USE_COMPLEX) 3301 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3302 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3303 *F = NULL; 3304 PetscFunctionReturn(PETSC_SUCCESS); 3305 } 3306 #endif 3307 /* Create the factorization matrix */ 3308 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3309 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3310 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3311 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3312 PetscCall(MatSetUp(B)); 3313 3314 PetscCall(PetscNew(&mumps)); 3315 3316 B->ops->view = MatView_MUMPS; 3317 B->ops->getinfo = MatGetInfo_MUMPS; 3318 3319 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3320 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3321 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3322 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3323 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3324 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3325 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3326 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3327 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3328 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3329 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3330 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3331 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3332 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3333 3334 if (ftype == MAT_FACTOR_LU) { 3335 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3336 B->factortype = MAT_FACTOR_LU; 3337 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3338 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3339 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3340 mumps->sym = 0; 3341 } else { 3342 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3343 B->factortype = MAT_FACTOR_CHOLESKY; 3344 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3345 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3346 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3347 #if defined(PETSC_USE_COMPLEX) 3348 mumps->sym = 2; 3349 #else 3350 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3351 else mumps->sym = 2; 3352 #endif 3353 } 3354 3355 /* set solvertype */ 3356 PetscCall(PetscFree(B->solvertype)); 3357 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3358 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3359 if (size == 1) { 3360 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3361 B->canuseordering = PETSC_TRUE; 3362 } 3363 B->ops->destroy = MatDestroy_MUMPS; 3364 B->data = (void *)mumps; 3365 3366 *F = B; 3367 mumps->id.job = JOB_NULL; 3368 mumps->ICNTL_pre = NULL; 3369 mumps->CNTL_pre = NULL; 3370 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3371 PetscFunctionReturn(PETSC_SUCCESS); 3372 } 3373 3374 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3375 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3376 { 3377 Mat B; 3378 Mat_MUMPS *mumps; 3379 PetscBool isSeqSBAIJ; 3380 PetscMPIInt size; 3381 3382 PetscFunctionBegin; 3383 #if defined(PETSC_USE_COMPLEX) 3384 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3385 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3386 *F = NULL; 3387 PetscFunctionReturn(PETSC_SUCCESS); 3388 } 3389 #endif 3390 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3391 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3392 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3393 PetscCall(MatSetUp(B)); 3394 3395 PetscCall(PetscNew(&mumps)); 3396 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3397 if (isSeqSBAIJ) { 3398 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3399 } else { 3400 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3401 } 3402 3403 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3404 B->ops->view = MatView_MUMPS; 3405 B->ops->getinfo = MatGetInfo_MUMPS; 3406 3407 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3408 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3409 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3410 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3411 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3412 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3413 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3414 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3415 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3416 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3417 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3418 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3419 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3420 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3421 3422 B->factortype = MAT_FACTOR_CHOLESKY; 3423 #if defined(PETSC_USE_COMPLEX) 3424 mumps->sym = 2; 3425 #else 3426 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3427 else mumps->sym = 2; 3428 #endif 3429 3430 /* set solvertype */ 3431 PetscCall(PetscFree(B->solvertype)); 3432 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3433 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3434 if (size == 1) { 3435 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3436 B->canuseordering = PETSC_TRUE; 3437 } 3438 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3439 B->ops->destroy = MatDestroy_MUMPS; 3440 B->data = (void *)mumps; 3441 3442 *F = B; 3443 mumps->id.job = JOB_NULL; 3444 mumps->ICNTL_pre = NULL; 3445 mumps->CNTL_pre = NULL; 3446 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3447 PetscFunctionReturn(PETSC_SUCCESS); 3448 } 3449 3450 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3451 { 3452 Mat B; 3453 Mat_MUMPS *mumps; 3454 PetscBool isSeqBAIJ; 3455 PetscMPIInt size; 3456 3457 PetscFunctionBegin; 3458 /* Create the factorization matrix */ 3459 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3460 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3461 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3462 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3463 PetscCall(MatSetUp(B)); 3464 3465 PetscCall(PetscNew(&mumps)); 3466 if (ftype == MAT_FACTOR_LU) { 3467 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3468 B->factortype = MAT_FACTOR_LU; 3469 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3470 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3471 mumps->sym = 0; 3472 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3473 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3474 3475 B->ops->view = MatView_MUMPS; 3476 B->ops->getinfo = MatGetInfo_MUMPS; 3477 3478 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3479 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3480 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3481 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3482 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3483 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3484 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3485 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3486 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3487 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3488 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3489 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3490 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3491 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3492 3493 /* set solvertype */ 3494 PetscCall(PetscFree(B->solvertype)); 3495 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3496 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3497 if (size == 1) { 3498 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3499 B->canuseordering = PETSC_TRUE; 3500 } 3501 B->ops->destroy = MatDestroy_MUMPS; 3502 B->data = (void *)mumps; 3503 3504 *F = B; 3505 mumps->id.job = JOB_NULL; 3506 mumps->ICNTL_pre = NULL; 3507 mumps->CNTL_pre = NULL; 3508 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3509 PetscFunctionReturn(PETSC_SUCCESS); 3510 } 3511 3512 /* MatGetFactor for Seq and MPI SELL matrices */ 3513 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3514 { 3515 Mat B; 3516 Mat_MUMPS *mumps; 3517 PetscBool isSeqSELL; 3518 PetscMPIInt size; 3519 3520 PetscFunctionBegin; 3521 /* Create the factorization matrix */ 3522 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3523 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3524 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3525 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3526 PetscCall(MatSetUp(B)); 3527 3528 PetscCall(PetscNew(&mumps)); 3529 3530 B->ops->view = MatView_MUMPS; 3531 B->ops->getinfo = MatGetInfo_MUMPS; 3532 3533 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3534 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3535 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3536 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3537 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3538 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3539 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3540 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3541 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3542 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3543 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3544 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3545 3546 if (ftype == MAT_FACTOR_LU) { 3547 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3548 B->factortype = MAT_FACTOR_LU; 3549 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3550 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3551 mumps->sym = 0; 3552 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3553 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3554 3555 /* set solvertype */ 3556 PetscCall(PetscFree(B->solvertype)); 3557 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3558 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3559 if (size == 1) { 3560 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3561 B->canuseordering = PETSC_TRUE; 3562 } 3563 B->ops->destroy = MatDestroy_MUMPS; 3564 B->data = (void *)mumps; 3565 3566 *F = B; 3567 mumps->id.job = JOB_NULL; 3568 mumps->ICNTL_pre = NULL; 3569 mumps->CNTL_pre = NULL; 3570 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3571 PetscFunctionReturn(PETSC_SUCCESS); 3572 } 3573 3574 /* MatGetFactor for MATNEST matrices */ 3575 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3576 { 3577 Mat B, **mats; 3578 Mat_MUMPS *mumps; 3579 PetscInt nr, nc; 3580 PetscMPIInt size; 3581 PetscBool flg = PETSC_TRUE; 3582 3583 PetscFunctionBegin; 3584 #if defined(PETSC_USE_COMPLEX) 3585 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3586 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3587 *F = NULL; 3588 PetscFunctionReturn(PETSC_SUCCESS); 3589 } 3590 #endif 3591 3592 /* Return if some condition is not satisfied */ 3593 *F = NULL; 3594 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3595 if (ftype == MAT_FACTOR_CHOLESKY) { 3596 IS *rows, *cols; 3597 PetscInt *m, *M; 3598 3599 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3600 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3601 PetscCall(MatNestGetISs(A, rows, cols)); 3602 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3603 if (!flg) { 3604 PetscCall(PetscFree2(rows, cols)); 3605 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3606 PetscFunctionReturn(PETSC_SUCCESS); 3607 } 3608 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3609 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3610 for (PetscInt r = 0; flg && r < nr; r++) 3611 for (PetscInt k = r + 1; flg && k < nr; k++) 3612 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3613 PetscCall(PetscFree2(m, M)); 3614 PetscCall(PetscFree2(rows, cols)); 3615 if (!flg) { 3616 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3617 PetscFunctionReturn(PETSC_SUCCESS); 3618 } 3619 } 3620 3621 for (PetscInt r = 0; r < nr; r++) { 3622 for (PetscInt c = 0; c < nc; c++) { 3623 Mat sub = mats[r][c]; 3624 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans; 3625 3626 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3627 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3628 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3629 else { 3630 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3631 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3632 } 3633 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3634 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3635 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3636 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3637 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3638 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3639 if (ftype == MAT_FACTOR_CHOLESKY) { 3640 if (r == c && !isSeqAIJ && !isMPIAIJ && !isSeqSBAIJ && !isMPISBAIJ) { 3641 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3642 flg = PETSC_FALSE; 3643 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 3644 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3645 flg = PETSC_FALSE; 3646 } 3647 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 3648 PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3649 flg = PETSC_FALSE; 3650 } 3651 } 3652 } 3653 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3654 3655 /* Create the factorization matrix */ 3656 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3657 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3658 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3659 PetscCall(MatSetUp(B)); 3660 3661 PetscCall(PetscNew(&mumps)); 3662 3663 B->ops->view = MatView_MUMPS; 3664 B->ops->getinfo = MatGetInfo_MUMPS; 3665 3666 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3667 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3668 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3669 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3670 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3671 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3672 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3673 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3674 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3675 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3676 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3677 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3678 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3679 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3680 3681 if (ftype == MAT_FACTOR_LU) { 3682 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3683 B->factortype = MAT_FACTOR_LU; 3684 mumps->sym = 0; 3685 } else { 3686 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3687 B->factortype = MAT_FACTOR_CHOLESKY; 3688 #if defined(PETSC_USE_COMPLEX) 3689 mumps->sym = 2; 3690 #else 3691 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3692 else mumps->sym = 2; 3693 #endif 3694 } 3695 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3696 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3697 3698 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3699 if (size == 1) { 3700 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3701 B->canuseordering = PETSC_TRUE; 3702 } 3703 3704 /* set solvertype */ 3705 PetscCall(PetscFree(B->solvertype)); 3706 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3707 B->ops->destroy = MatDestroy_MUMPS; 3708 B->data = (void *)mumps; 3709 3710 *F = B; 3711 mumps->id.job = JOB_NULL; 3712 mumps->ICNTL_pre = NULL; 3713 mumps->CNTL_pre = NULL; 3714 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3715 PetscFunctionReturn(PETSC_SUCCESS); 3716 } 3717 3718 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3719 { 3720 PetscFunctionBegin; 3721 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3722 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3723 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3724 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3725 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3726 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3727 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3728 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3729 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3730 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3731 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3732 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3733 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3734 PetscFunctionReturn(PETSC_SUCCESS); 3735 } 3736