1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <petscsf.h> 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 9 #include <../src/mat/impls/sell/mpi/mpisell.h> 10 11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #if defined(PETSC_USE_REAL_SINGLE) 16 #include <cmumps_c.h> 17 #else 18 #include <zmumps_c.h> 19 #endif 20 #else 21 #if defined(PETSC_USE_REAL_SINGLE) 22 #include <smumps_c.h> 23 #else 24 #include <dmumps_c.h> 25 #endif 26 #endif 27 EXTERN_C_END 28 #define JOB_INIT -1 29 #define JOB_NULL 0 30 #define JOB_FACTSYMBOLIC 1 31 #define JOB_FACTNUMERIC 2 32 #define JOB_SOLVE 3 33 #define JOB_END -2 34 35 /* calls to MUMPS */ 36 #if defined(PETSC_USE_COMPLEX) 37 #if defined(PETSC_USE_REAL_SINGLE) 38 #define MUMPS_c cmumps_c 39 #else 40 #define MUMPS_c zmumps_c 41 #endif 42 #else 43 #if defined(PETSC_USE_REAL_SINGLE) 44 #define MUMPS_c smumps_c 45 #else 46 #define MUMPS_c dmumps_c 47 #endif 48 #endif 49 50 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 51 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 52 naming convention in PetscMPIInt, PetscBLASInt etc. 53 */ 54 typedef MUMPS_INT PetscMUMPSInt; 55 56 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 57 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 58 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 59 #endif 60 #else 61 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 62 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 63 #endif 64 #endif 65 66 #define MPIU_MUMPSINT MPI_INT 67 #define PETSC_MUMPS_INT_MAX 2147483647 68 #define PETSC_MUMPS_INT_MIN -2147483648 69 70 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 71 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 72 { 73 PetscFunctionBegin; 74 #if PetscDefined(USE_64BIT_INDICES) 75 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 76 #endif 77 *b = (PetscMUMPSInt)(a); 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 /* Put these utility routines here since they are only used in this file */ 82 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 83 { 84 PetscInt myval; 85 PetscBool myset; 86 PetscFunctionBegin; 87 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 88 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 89 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 90 if (set) *set = myset; 91 PetscFunctionReturn(PETSC_SUCCESS); 92 } 93 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 94 95 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 96 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 97 #define PetscMUMPS_c(mumps) \ 98 do { \ 99 if (mumps->use_petsc_omp_support) { \ 100 if (mumps->is_omp_master) { \ 101 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 102 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 103 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 104 PetscCall(PetscFPTrapPop()); \ 105 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 106 } \ 107 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 108 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 109 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 110 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 111 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 112 */ \ 113 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 115 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 116 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 117 } else { \ 118 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 119 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 120 PetscCall(PetscFPTrapPop()); \ 121 } \ 122 } while (0) 123 #else 124 #define PetscMUMPS_c(mumps) \ 125 do { \ 126 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 127 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 128 PetscCall(PetscFPTrapPop()); \ 129 } while (0) 130 #endif 131 132 /* declare MumpsScalar */ 133 #if defined(PETSC_USE_COMPLEX) 134 #if defined(PETSC_USE_REAL_SINGLE) 135 #define MumpsScalar mumps_complex 136 #else 137 #define MumpsScalar mumps_double_complex 138 #endif 139 #else 140 #define MumpsScalar PetscScalar 141 #endif 142 143 /* macros s.t. indices match MUMPS documentation */ 144 #define ICNTL(I) icntl[(I)-1] 145 #define CNTL(I) cntl[(I)-1] 146 #define INFOG(I) infog[(I)-1] 147 #define INFO(I) info[(I)-1] 148 #define RINFOG(I) rinfog[(I)-1] 149 #define RINFO(I) rinfo[(I)-1] 150 151 typedef struct Mat_MUMPS Mat_MUMPS; 152 struct Mat_MUMPS { 153 #if defined(PETSC_USE_COMPLEX) 154 #if defined(PETSC_USE_REAL_SINGLE) 155 CMUMPS_STRUC_C id; 156 #else 157 ZMUMPS_STRUC_C id; 158 #endif 159 #else 160 #if defined(PETSC_USE_REAL_SINGLE) 161 SMUMPS_STRUC_C id; 162 #else 163 DMUMPS_STRUC_C id; 164 #endif 165 #endif 166 167 MatStructure matstruc; 168 PetscMPIInt myid, petsc_size; 169 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 170 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 171 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 172 PetscMUMPSInt sym; 173 MPI_Comm mumps_comm; 174 PetscMUMPSInt *ICNTL_pre; 175 PetscReal *CNTL_pre; 176 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 177 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 178 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 179 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 180 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 181 PetscInt *rhs_nrow, max_nrhs; 182 PetscMPIInt *rhs_recvcounts, *rhs_disps; 183 PetscScalar *rhs_loc, *rhs_recvbuf; 184 #endif 185 Vec b_seq, x_seq; 186 PetscInt ninfo, *info; /* which INFO to display */ 187 PetscInt sizeredrhs; 188 PetscScalar *schur_sol; 189 PetscInt schur_sizesol; 190 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 191 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 192 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 193 194 /* Support for MATNEST */ 195 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 196 PetscInt64 *nest_vals_start; 197 PetscScalar *nest_vals; 198 199 /* stuff used by petsc/mumps OpenMP support*/ 200 PetscBool use_petsc_omp_support; 201 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 202 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 203 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 204 PetscMPIInt tag, omp_comm_size; 205 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 206 MPI_Request *reqs; 207 }; 208 209 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 210 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 211 */ 212 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 213 { 214 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 215 216 PetscFunctionBegin; 217 #if defined(PETSC_USE_64BIT_INDICES) 218 { 219 PetscInt i; 220 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 221 PetscCall(PetscFree(mumps->ia_alloc)); 222 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 223 mumps->cur_ilen = nrow + 1; 224 } 225 if (nnz > mumps->cur_jlen) { 226 PetscCall(PetscFree(mumps->ja_alloc)); 227 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 228 mumps->cur_jlen = nnz; 229 } 230 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 231 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 232 *ia_mumps = mumps->ia_alloc; 233 *ja_mumps = mumps->ja_alloc; 234 } 235 #else 236 *ia_mumps = ia; 237 *ja_mumps = ja; 238 #endif 239 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 240 PetscFunctionReturn(PETSC_SUCCESS); 241 } 242 243 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 244 { 245 PetscFunctionBegin; 246 PetscCall(PetscFree(mumps->id.listvar_schur)); 247 PetscCall(PetscFree(mumps->id.redrhs)); 248 PetscCall(PetscFree(mumps->schur_sol)); 249 mumps->id.size_schur = 0; 250 mumps->id.schur_lld = 0; 251 mumps->id.ICNTL(19) = 0; 252 PetscFunctionReturn(PETSC_SUCCESS); 253 } 254 255 /* solve with rhs in mumps->id.redrhs and return in the same location */ 256 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 257 { 258 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 259 Mat S, B, X; 260 MatFactorSchurStatus schurstatus; 261 PetscInt sizesol; 262 263 PetscFunctionBegin; 264 PetscCall(MatFactorFactorizeSchurComplement(F)); 265 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 266 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 267 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 268 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 269 PetscCall(MatBindToCPU(B, S->boundtocpu)); 270 #endif 271 switch (schurstatus) { 272 case MAT_FACTOR_SCHUR_FACTORED: 273 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 274 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 275 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 276 PetscCall(MatBindToCPU(X, S->boundtocpu)); 277 #endif 278 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 279 PetscCall(MatMatSolveTranspose(S, B, X)); 280 } else { 281 PetscCall(MatMatSolve(S, B, X)); 282 } 283 break; 284 case MAT_FACTOR_SCHUR_INVERTED: 285 sizesol = mumps->id.nrhs * mumps->id.size_schur; 286 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 287 PetscCall(PetscFree(mumps->schur_sol)); 288 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 289 mumps->schur_sizesol = sizesol; 290 } 291 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 292 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 294 PetscCall(MatBindToCPU(X, S->boundtocpu)); 295 #endif 296 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 297 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 298 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 299 } else { 300 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 301 } 302 PetscCall(MatProductSetFromOptions(X)); 303 PetscCall(MatProductSymbolic(X)); 304 PetscCall(MatProductNumeric(X)); 305 306 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 307 break; 308 default: 309 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 310 } 311 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 312 PetscCall(MatDestroy(&B)); 313 PetscCall(MatDestroy(&X)); 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 318 { 319 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 320 321 PetscFunctionBegin; 322 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 if (!expansion) { /* prepare for the condensation step */ 326 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 327 /* allocate MUMPS internal array to store reduced right-hand sides */ 328 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 329 PetscCall(PetscFree(mumps->id.redrhs)); 330 mumps->id.lredrhs = mumps->id.size_schur; 331 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 332 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 333 } 334 } else { /* prepare for the expansion step */ 335 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 336 PetscCall(MatMumpsSolveSchur_Private(F)); 337 mumps->id.ICNTL(26) = 2; /* expansion phase */ 338 PetscMUMPS_c(mumps); 339 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 340 /* restore defaults */ 341 mumps->id.ICNTL(26) = -1; 342 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 343 if (mumps->id.nrhs > 1) { 344 PetscCall(PetscFree(mumps->id.redrhs)); 345 mumps->id.lredrhs = 0; 346 mumps->sizeredrhs = 0; 347 } 348 } 349 PetscFunctionReturn(PETSC_SUCCESS); 350 } 351 352 /* 353 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 354 355 input: 356 A - matrix in aij,baij or sbaij format 357 shift - 0: C style output triple; 1: Fortran style output triple. 358 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 359 MAT_REUSE_MATRIX: only the values in v array are updated 360 output: 361 nnz - dim of r, c, and v (number of local nonzero entries of A) 362 r, c, v - row and col index, matrix values (matrix triples) 363 364 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 365 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 366 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 367 368 */ 369 370 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 371 { 372 const PetscScalar *av; 373 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 374 PetscInt64 nz, rnz, i, j, k; 375 PetscMUMPSInt *row, *col; 376 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 377 378 PetscFunctionBegin; 379 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 380 if (reuse == MAT_INITIAL_MATRIX) { 381 nz = aa->nz; 382 ai = aa->i; 383 aj = aa->j; 384 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 385 for (i = k = 0; i < M; i++) { 386 rnz = ai[i + 1] - ai[i]; 387 ajj = aj + ai[i]; 388 for (j = 0; j < rnz; j++) { 389 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 390 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 391 k++; 392 } 393 } 394 mumps->val = (PetscScalar *)av; 395 mumps->irn = row; 396 mumps->jcn = col; 397 mumps->nnz = nz; 398 } else PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); 399 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 404 { 405 PetscInt64 nz, i, j, k, r; 406 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 407 PetscMUMPSInt *row, *col; 408 409 PetscFunctionBegin; 410 nz = a->sliidx[a->totalslices]; 411 if (reuse == MAT_INITIAL_MATRIX) { 412 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 413 for (i = k = 0; i < a->totalslices; i++) { 414 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 415 } 416 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 417 mumps->irn = row; 418 mumps->jcn = col; 419 mumps->nnz = nz; 420 mumps->val = a->val; 421 } else PetscCall(PetscArraycpy(mumps->val, a->val, nz)); 422 PetscFunctionReturn(PETSC_SUCCESS); 423 } 424 425 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 426 { 427 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 428 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 429 PetscInt64 M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 430 PetscInt bs; 431 PetscMUMPSInt *row, *col; 432 433 PetscFunctionBegin; 434 if (reuse == MAT_INITIAL_MATRIX) { 435 PetscCall(MatGetBlockSize(A, &bs)); 436 M = A->rmap->N / bs; 437 ai = aa->i; 438 aj = aa->j; 439 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 440 for (i = 0; i < M; i++) { 441 ajj = aj + ai[i]; 442 rnz = ai[i + 1] - ai[i]; 443 for (k = 0; k < rnz; k++) { 444 for (j = 0; j < bs; j++) { 445 for (m = 0; m < bs; m++) { 446 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 447 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 448 idx++; 449 } 450 } 451 } 452 } 453 mumps->irn = row; 454 mumps->jcn = col; 455 mumps->nnz = nz; 456 mumps->val = aa->a; 457 } else PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); 458 PetscFunctionReturn(PETSC_SUCCESS); 459 } 460 461 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 462 { 463 const PetscInt *ai, *aj, *ajj; 464 PetscInt bs; 465 PetscInt64 nz, rnz, i, j, k, m; 466 PetscMUMPSInt *row, *col; 467 PetscScalar *val; 468 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 469 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 470 #if defined(PETSC_USE_COMPLEX) 471 PetscBool isset, hermitian; 472 #endif 473 474 PetscFunctionBegin; 475 #if defined(PETSC_USE_COMPLEX) 476 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 477 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 478 #endif 479 ai = aa->i; 480 aj = aa->j; 481 PetscCall(MatGetBlockSize(A, &bs)); 482 if (reuse == MAT_INITIAL_MATRIX) { 483 const PetscInt64 alloc_size = aa->nz * bs2; 484 485 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 486 if (bs > 1) { 487 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 488 mumps->val = mumps->val_alloc; 489 } else { 490 mumps->val = aa->a; 491 } 492 mumps->irn = row; 493 mumps->jcn = col; 494 } else { 495 row = mumps->irn; 496 col = mumps->jcn; 497 } 498 val = mumps->val; 499 500 nz = 0; 501 if (bs > 1) { 502 for (i = 0; i < mbs; i++) { 503 rnz = ai[i + 1] - ai[i]; 504 ajj = aj + ai[i]; 505 for (j = 0; j < rnz; j++) { 506 for (k = 0; k < bs; k++) { 507 for (m = 0; m < bs; m++) { 508 if (ajj[j] > i || k >= m) { 509 if (reuse == MAT_INITIAL_MATRIX) { 510 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 511 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 512 } 513 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 514 } 515 } 516 } 517 } 518 } 519 } else if (reuse == MAT_INITIAL_MATRIX) { 520 for (i = 0; i < mbs; i++) { 521 rnz = ai[i + 1] - ai[i]; 522 ajj = aj + ai[i]; 523 for (j = 0; j < rnz; j++) { 524 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 525 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 526 nz++; 527 } 528 } 529 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 530 } else PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX */ 531 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 532 PetscFunctionReturn(PETSC_SUCCESS); 533 } 534 535 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 536 { 537 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 538 PetscInt64 nz, rnz, i, j; 539 const PetscScalar *av, *v1; 540 PetscScalar *val; 541 PetscMUMPSInt *row, *col; 542 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 543 PetscBool missing; 544 #if defined(PETSC_USE_COMPLEX) 545 PetscBool hermitian, isset; 546 #endif 547 548 PetscFunctionBegin; 549 #if defined(PETSC_USE_COMPLEX) 550 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 551 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 552 #endif 553 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 554 ai = aa->i; 555 aj = aa->j; 556 adiag = aa->diag; 557 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 558 if (reuse == MAT_INITIAL_MATRIX) { 559 /* count nz in the upper triangular part of A */ 560 nz = 0; 561 if (missing) { 562 for (i = 0; i < M; i++) { 563 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 564 for (j = ai[i]; j < ai[i + 1]; j++) { 565 if (aj[j] < i) continue; 566 nz++; 567 } 568 } else { 569 nz += ai[i + 1] - adiag[i]; 570 } 571 } 572 } else { 573 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 574 } 575 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 576 PetscCall(PetscMalloc1(nz, &val)); 577 mumps->nnz = nz; 578 mumps->irn = row; 579 mumps->jcn = col; 580 mumps->val = mumps->val_alloc = val; 581 582 nz = 0; 583 if (missing) { 584 for (i = 0; i < M; i++) { 585 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 586 for (j = ai[i]; j < ai[i + 1]; j++) { 587 if (aj[j] < i) continue; 588 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 589 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 590 val[nz] = av[j]; 591 nz++; 592 } 593 } else { 594 rnz = ai[i + 1] - adiag[i]; 595 ajj = aj + adiag[i]; 596 v1 = av + adiag[i]; 597 for (j = 0; j < rnz; j++) { 598 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 599 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 600 val[nz++] = v1[j]; 601 } 602 } 603 } 604 } else { 605 for (i = 0; i < M; i++) { 606 rnz = ai[i + 1] - adiag[i]; 607 ajj = aj + adiag[i]; 608 v1 = av + adiag[i]; 609 for (j = 0; j < rnz; j++) { 610 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 611 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 612 val[nz++] = v1[j]; 613 } 614 } 615 } 616 } else { 617 nz = 0; 618 val = mumps->val; 619 if (missing) { 620 for (i = 0; i < M; i++) { 621 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 622 for (j = ai[i]; j < ai[i + 1]; j++) { 623 if (aj[j] < i) continue; 624 val[nz++] = av[j]; 625 } 626 } else { 627 rnz = ai[i + 1] - adiag[i]; 628 v1 = av + adiag[i]; 629 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 630 } 631 } 632 } else { 633 for (i = 0; i < M; i++) { 634 rnz = ai[i + 1] - adiag[i]; 635 v1 = av + adiag[i]; 636 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 637 } 638 } 639 } 640 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 641 PetscFunctionReturn(PETSC_SUCCESS); 642 } 643 644 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 645 { 646 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 647 PetscInt bs; 648 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 649 PetscMUMPSInt *row, *col; 650 const PetscScalar *av, *bv, *v1, *v2; 651 PetscScalar *val; 652 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 653 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 654 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 655 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 656 #if defined(PETSC_USE_COMPLEX) 657 PetscBool hermitian, isset; 658 #endif 659 660 PetscFunctionBegin; 661 #if defined(PETSC_USE_COMPLEX) 662 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 663 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 664 #endif 665 PetscCall(MatGetBlockSize(A, &bs)); 666 rstart = A->rmap->rstart; 667 ai = aa->i; 668 aj = aa->j; 669 bi = bb->i; 670 bj = bb->j; 671 av = aa->a; 672 bv = bb->a; 673 674 garray = mat->garray; 675 676 if (reuse == MAT_INITIAL_MATRIX) { 677 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 678 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 679 PetscCall(PetscMalloc1(nz, &val)); 680 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 681 mumps->irn = row; 682 mumps->jcn = col; 683 mumps->val = mumps->val_alloc = val; 684 } else { 685 val = mumps->val; 686 } 687 688 jj = 0; 689 irow = rstart; 690 for (i = 0; i < mbs; i++) { 691 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 692 countA = ai[i + 1] - ai[i]; 693 countB = bi[i + 1] - bi[i]; 694 bjj = bj + bi[i]; 695 v1 = av + ai[i] * bs2; 696 v2 = bv + bi[i] * bs2; 697 698 if (bs > 1) { 699 /* A-part */ 700 for (j = 0; j < countA; j++) { 701 for (k = 0; k < bs; k++) { 702 for (m = 0; m < bs; m++) { 703 if (rstart + ajj[j] * bs > irow || k >= m) { 704 if (reuse == MAT_INITIAL_MATRIX) { 705 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 706 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 707 } 708 val[jj++] = v1[j * bs2 + m + k * bs]; 709 } 710 } 711 } 712 } 713 714 /* B-part */ 715 for (j = 0; j < countB; j++) { 716 for (k = 0; k < bs; k++) { 717 for (m = 0; m < bs; m++) { 718 if (reuse == MAT_INITIAL_MATRIX) { 719 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 720 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 721 } 722 val[jj++] = v2[j * bs2 + m + k * bs]; 723 } 724 } 725 } 726 } else { 727 /* A-part */ 728 for (j = 0; j < countA; j++) { 729 if (reuse == MAT_INITIAL_MATRIX) { 730 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 731 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 732 } 733 val[jj++] = v1[j]; 734 } 735 736 /* B-part */ 737 for (j = 0; j < countB; j++) { 738 if (reuse == MAT_INITIAL_MATRIX) { 739 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 740 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 741 } 742 val[jj++] = v2[j]; 743 } 744 } 745 irow += bs; 746 } 747 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 748 PetscFunctionReturn(PETSC_SUCCESS); 749 } 750 751 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 752 { 753 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 754 PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 755 PetscMUMPSInt *row, *col; 756 const PetscScalar *av, *bv, *v1, *v2; 757 PetscScalar *val; 758 Mat Ad, Ao; 759 Mat_SeqAIJ *aa; 760 Mat_SeqAIJ *bb; 761 762 PetscFunctionBegin; 763 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 764 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 765 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 766 767 aa = (Mat_SeqAIJ *)(Ad)->data; 768 bb = (Mat_SeqAIJ *)(Ao)->data; 769 ai = aa->i; 770 aj = aa->j; 771 bi = bb->i; 772 bj = bb->j; 773 774 rstart = A->rmap->rstart; 775 cstart = A->cmap->rstart; 776 777 if (reuse == MAT_INITIAL_MATRIX) { 778 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 779 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 780 PetscCall(PetscMalloc1(nz, &val)); 781 mumps->nnz = nz; 782 mumps->irn = row; 783 mumps->jcn = col; 784 mumps->val = mumps->val_alloc = val; 785 } else { 786 val = mumps->val; 787 } 788 789 jj = 0; 790 irow = rstart; 791 for (i = 0; i < m; i++) { 792 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 793 countA = ai[i + 1] - ai[i]; 794 countB = bi[i + 1] - bi[i]; 795 bjj = bj + bi[i]; 796 v1 = av + ai[i]; 797 v2 = bv + bi[i]; 798 799 /* A-part */ 800 for (j = 0; j < countA; j++) { 801 if (reuse == MAT_INITIAL_MATRIX) { 802 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 803 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 804 } 805 val[jj++] = v1[j]; 806 } 807 808 /* B-part */ 809 for (j = 0; j < countB; j++) { 810 if (reuse == MAT_INITIAL_MATRIX) { 811 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 812 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 813 } 814 val[jj++] = v2[j]; 815 } 816 irow++; 817 } 818 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 819 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 820 PetscFunctionReturn(PETSC_SUCCESS); 821 } 822 823 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 824 { 825 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 826 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 827 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 828 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 829 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 830 const PetscInt bs2 = mat->bs2; 831 PetscInt bs; 832 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 833 PetscMUMPSInt *row, *col; 834 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 835 PetscScalar *val; 836 837 PetscFunctionBegin; 838 PetscCall(MatGetBlockSize(A, &bs)); 839 if (reuse == MAT_INITIAL_MATRIX) { 840 nz = bs2 * (aa->nz + bb->nz); 841 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 842 PetscCall(PetscMalloc1(nz, &val)); 843 mumps->nnz = nz; 844 mumps->irn = row; 845 mumps->jcn = col; 846 mumps->val = mumps->val_alloc = val; 847 } else { 848 val = mumps->val; 849 } 850 851 jj = 0; 852 irow = rstart; 853 for (i = 0; i < mbs; i++) { 854 countA = ai[i + 1] - ai[i]; 855 countB = bi[i + 1] - bi[i]; 856 ajj = aj + ai[i]; 857 bjj = bj + bi[i]; 858 v1 = av + bs2 * ai[i]; 859 v2 = bv + bs2 * bi[i]; 860 861 idx = 0; 862 /* A-part */ 863 for (k = 0; k < countA; k++) { 864 for (j = 0; j < bs; j++) { 865 for (n = 0; n < bs; n++) { 866 if (reuse == MAT_INITIAL_MATRIX) { 867 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 868 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 869 } 870 val[jj++] = v1[idx++]; 871 } 872 } 873 } 874 875 idx = 0; 876 /* B-part */ 877 for (k = 0; k < countB; k++) { 878 for (j = 0; j < bs; j++) { 879 for (n = 0; n < bs; n++) { 880 if (reuse == MAT_INITIAL_MATRIX) { 881 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 882 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 883 } 884 val[jj++] = v2[idx++]; 885 } 886 } 887 } 888 irow += bs; 889 } 890 PetscFunctionReturn(PETSC_SUCCESS); 891 } 892 893 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 894 { 895 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 896 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 897 PetscMUMPSInt *row, *col; 898 const PetscScalar *av, *bv, *v1, *v2; 899 PetscScalar *val; 900 Mat Ad, Ao; 901 Mat_SeqAIJ *aa; 902 Mat_SeqAIJ *bb; 903 #if defined(PETSC_USE_COMPLEX) 904 PetscBool hermitian, isset; 905 #endif 906 907 PetscFunctionBegin; 908 #if defined(PETSC_USE_COMPLEX) 909 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 910 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 911 #endif 912 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 913 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 914 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 915 916 aa = (Mat_SeqAIJ *)(Ad)->data; 917 bb = (Mat_SeqAIJ *)(Ao)->data; 918 ai = aa->i; 919 aj = aa->j; 920 adiag = aa->diag; 921 bi = bb->i; 922 bj = bb->j; 923 924 rstart = A->rmap->rstart; 925 926 if (reuse == MAT_INITIAL_MATRIX) { 927 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 928 nzb = 0; /* num of upper triangular entries in mat->B */ 929 for (i = 0; i < m; i++) { 930 nza += (ai[i + 1] - adiag[i]); 931 countB = bi[i + 1] - bi[i]; 932 bjj = bj + bi[i]; 933 for (j = 0; j < countB; j++) { 934 if (garray[bjj[j]] > rstart) nzb++; 935 } 936 } 937 938 nz = nza + nzb; /* total nz of upper triangular part of mat */ 939 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 940 PetscCall(PetscMalloc1(nz, &val)); 941 mumps->nnz = nz; 942 mumps->irn = row; 943 mumps->jcn = col; 944 mumps->val = mumps->val_alloc = val; 945 } else { 946 val = mumps->val; 947 } 948 949 jj = 0; 950 irow = rstart; 951 for (i = 0; i < m; i++) { 952 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 953 v1 = av + adiag[i]; 954 countA = ai[i + 1] - adiag[i]; 955 countB = bi[i + 1] - bi[i]; 956 bjj = bj + bi[i]; 957 v2 = bv + bi[i]; 958 959 /* A-part */ 960 for (j = 0; j < countA; j++) { 961 if (reuse == MAT_INITIAL_MATRIX) { 962 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 963 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 964 } 965 val[jj++] = v1[j]; 966 } 967 968 /* B-part */ 969 for (j = 0; j < countB; j++) { 970 if (garray[bjj[j]] > rstart) { 971 if (reuse == MAT_INITIAL_MATRIX) { 972 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 973 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 974 } 975 val[jj++] = v2[j]; 976 } 977 } 978 irow++; 979 } 980 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 981 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 982 PetscFunctionReturn(PETSC_SUCCESS); 983 } 984 985 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 986 { 987 const PetscScalar *av; 988 const PetscInt M = A->rmap->n; 989 PetscInt64 i; 990 PetscMUMPSInt *row, *col; 991 Vec v; 992 993 PetscFunctionBegin; 994 PetscCall(MatDiagonalGetDiagonal(A, &v)); 995 PetscCall(VecGetArrayRead(v, &av)); 996 if (reuse == MAT_INITIAL_MATRIX) { 997 PetscCall(PetscMalloc2(M, &row, M, &col)); 998 for (i = 0; i < M; i++) { 999 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1000 col[i] = row[i]; 1001 } 1002 mumps->val = (PetscScalar *)av; 1003 mumps->irn = row; 1004 mumps->jcn = col; 1005 mumps->nnz = M; 1006 } else PetscCall(PetscArraycpy(mumps->val, av, M)); 1007 PetscCall(VecRestoreArrayRead(v, &av)); 1008 PetscFunctionReturn(PETSC_SUCCESS); 1009 } 1010 1011 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1012 { 1013 Mat **mats; 1014 PetscInt nr, nc; 1015 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1016 1017 PetscFunctionBegin; 1018 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1019 if (reuse == MAT_INITIAL_MATRIX) { 1020 PetscMUMPSInt *irns, *jcns; 1021 PetscScalar *vals; 1022 PetscInt64 totnnz, cumnnz, maxnnz; 1023 PetscInt *pjcns_w; 1024 IS *rows, *cols; 1025 PetscInt **rows_idx, **cols_idx; 1026 1027 cumnnz = 0; 1028 maxnnz = 0; 1029 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1030 for (PetscInt r = 0; r < nr; r++) { 1031 for (PetscInt c = 0; c < nc; c++) { 1032 Mat sub = mats[r][c]; 1033 1034 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1035 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1036 if (sub) { 1037 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1038 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE, isDiag; 1039 MatInfo info; 1040 1041 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1042 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1043 else { 1044 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1045 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1046 } 1047 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1048 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1049 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1050 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1051 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1052 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1053 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1054 1055 if (chol) { 1056 if (r == c) { 1057 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1058 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1059 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1060 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1061 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1062 } else { 1063 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1064 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1065 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1066 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1067 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1068 } 1069 } else { 1070 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1071 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1072 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1073 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1074 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1075 } 1076 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1077 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1078 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1079 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1080 maxnnz = PetscMax(maxnnz, info.nz_used); 1081 } 1082 } 1083 } 1084 1085 /* Allocate total COO */ 1086 totnnz = cumnnz; 1087 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1088 PetscCall(PetscMalloc1(totnnz, &vals)); 1089 1090 /* Handle rows and column maps 1091 We directly map rows and use an SF for the columns */ 1092 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1093 PetscCall(MatNestGetISs(A, rows, cols)); 1094 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1095 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1096 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1097 else (void)maxnnz; 1098 1099 cumnnz = 0; 1100 for (PetscInt r = 0; r < nr; r++) { 1101 for (PetscInt c = 0; c < nc; c++) { 1102 Mat sub = mats[r][c]; 1103 const PetscInt *ridx = rows_idx[r]; 1104 const PetscInt *cidx = cols_idx[c]; 1105 PetscInt rst; 1106 PetscSF csf; 1107 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1108 PetscLayout cmap; 1109 1110 mumps->nest_vals_start[r * nc + c] = cumnnz; 1111 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1112 1113 /* Extract inner blocks if needed */ 1114 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1115 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1116 else { 1117 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1118 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1119 } 1120 swap = (PetscBool)(isTrans || isHTrans); 1121 1122 /* Get column layout to map off-process columns */ 1123 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1124 1125 /* Get row start to map on-process rows */ 1126 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1127 1128 /* Directly use the mumps datastructure and use C ordering for now */ 1129 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1130 1131 /* Swap the role of rows and columns indices for transposed blocks 1132 since we need values with global final ordering */ 1133 if (swap) { 1134 cidx = rows_idx[r]; 1135 ridx = cols_idx[c]; 1136 } 1137 1138 /* Communicate column indices 1139 This could have been done with a single SF but it would have complicated the code a lot. 1140 But since we do it only once, we pay the price of setting up an SF for each block */ 1141 if (PetscDefined(USE_64BIT_INDICES)) { 1142 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1143 } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 1144 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1145 PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1146 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1147 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1148 PetscCall(PetscSFDestroy(&csf)); 1149 1150 /* Import indices: use direct map for rows and mapped indices for columns */ 1151 if (swap) { 1152 for (PetscInt k = 0; k < mumps->nnz; k++) { 1153 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1154 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1155 } 1156 } else { 1157 for (PetscInt k = 0; k < mumps->nnz; k++) { 1158 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1159 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1160 } 1161 } 1162 1163 /* Import values to full COO */ 1164 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1165 if (isHTrans) { /* conjugate the entries */ 1166 PetscScalar *v = vals + cumnnz; 1167 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]); 1168 } 1169 1170 /* Shift new starting point and sanity check */ 1171 cumnnz += mumps->nnz; 1172 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1173 1174 /* Free scratch memory */ 1175 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1176 PetscCall(PetscFree(mumps->val_alloc)); 1177 mumps->val = NULL; 1178 mumps->nnz = 0; 1179 } 1180 } 1181 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1182 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1183 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1184 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1185 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1186 mumps->nest_vals_start[nr * nc] = cumnnz; 1187 1188 /* Set pointers for final MUMPS data structure */ 1189 mumps->nest_vals = vals; 1190 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1191 mumps->val = vals; 1192 mumps->irn = irns; 1193 mumps->jcn = jcns; 1194 mumps->nnz = cumnnz; 1195 } else { 1196 PetscScalar *oval = mumps->nest_vals; 1197 for (PetscInt r = 0; r < nr; r++) { 1198 for (PetscInt c = 0; c < nc; c++) { 1199 PetscBool isTrans, isHTrans = PETSC_FALSE; 1200 Mat sub = mats[r][c]; 1201 PetscInt midx = r * nc + c; 1202 1203 if (!mumps->nest_convert_to_triples[midx]) continue; 1204 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1205 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1206 else { 1207 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1208 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1209 } 1210 mumps->val = oval + mumps->nest_vals_start[midx]; 1211 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1212 if (isHTrans) { 1213 PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1214 for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1215 } 1216 } 1217 } 1218 mumps->val = oval; 1219 } 1220 PetscFunctionReturn(PETSC_SUCCESS); 1221 } 1222 1223 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1224 { 1225 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1226 1227 PetscFunctionBegin; 1228 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1229 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1230 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1231 PetscCall(VecDestroy(&mumps->b_seq)); 1232 PetscCall(VecDestroy(&mumps->x_seq)); 1233 PetscCall(PetscFree(mumps->id.perm_in)); 1234 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1235 PetscCall(PetscFree(mumps->val_alloc)); 1236 PetscCall(PetscFree(mumps->info)); 1237 PetscCall(PetscFree(mumps->ICNTL_pre)); 1238 PetscCall(PetscFree(mumps->CNTL_pre)); 1239 PetscCall(MatMumpsResetSchur_Private(mumps)); 1240 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1241 mumps->id.job = JOB_END; 1242 PetscMUMPS_c(mumps); 1243 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1244 if (mumps->mumps_comm != MPI_COMM_NULL) { 1245 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1246 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1247 } 1248 } 1249 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1250 if (mumps->use_petsc_omp_support) { 1251 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1252 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1253 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1254 } 1255 #endif 1256 PetscCall(PetscFree(mumps->ia_alloc)); 1257 PetscCall(PetscFree(mumps->ja_alloc)); 1258 PetscCall(PetscFree(mumps->recvcount)); 1259 PetscCall(PetscFree(mumps->reqs)); 1260 PetscCall(PetscFree(mumps->irhs_loc)); 1261 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1262 PetscCall(PetscFree(mumps->nest_vals)); 1263 PetscCall(PetscFree(A->data)); 1264 1265 /* clear composed functions */ 1266 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1267 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1268 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1269 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1270 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1271 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1272 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1273 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1274 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1275 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1276 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1277 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1278 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1279 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1280 PetscFunctionReturn(PETSC_SUCCESS); 1281 } 1282 1283 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1284 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1285 { 1286 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1287 const PetscMPIInt ompsize = mumps->omp_comm_size; 1288 PetscInt i, m, M, rstart; 1289 1290 PetscFunctionBegin; 1291 PetscCall(MatGetSize(A, &M, NULL)); 1292 PetscCall(MatGetLocalSize(A, &m, NULL)); 1293 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1294 if (ompsize == 1) { 1295 if (!mumps->irhs_loc) { 1296 mumps->nloc_rhs = m; 1297 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1298 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1299 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1300 } 1301 mumps->id.rhs_loc = (MumpsScalar *)array; 1302 } else { 1303 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1304 const PetscInt *ranges; 1305 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1306 MPI_Group petsc_group, omp_group; 1307 PetscScalar *recvbuf = NULL; 1308 1309 if (mumps->is_omp_master) { 1310 /* Lazily initialize the omp stuff for distributed rhs */ 1311 if (!mumps->irhs_loc) { 1312 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1313 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1314 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1315 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1316 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1317 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1318 1319 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1320 mumps->nloc_rhs = 0; 1321 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1322 for (j = 0; j < ompsize; j++) { 1323 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1324 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1325 } 1326 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1327 for (j = k = 0; j < ompsize; j++) { 1328 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1329 } 1330 1331 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1332 PetscCallMPI(MPI_Group_free(&petsc_group)); 1333 PetscCallMPI(MPI_Group_free(&omp_group)); 1334 } 1335 1336 /* Realloc buffers when current nrhs is bigger than what we have met */ 1337 if (nrhs > mumps->max_nrhs) { 1338 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1339 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1340 mumps->max_nrhs = nrhs; 1341 } 1342 1343 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1344 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1345 mumps->rhs_disps[0] = 0; 1346 for (j = 1; j < ompsize; j++) { 1347 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1348 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1349 } 1350 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1351 } 1352 1353 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1354 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1355 1356 if (mumps->is_omp_master) { 1357 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1358 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1359 for (j = 0; j < ompsize; j++) { 1360 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1361 dst = dstbase; 1362 for (i = 0; i < nrhs; i++) { 1363 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1364 src += mumps->rhs_nrow[j]; 1365 dst += mumps->nloc_rhs; 1366 } 1367 dstbase += mumps->rhs_nrow[j]; 1368 } 1369 } 1370 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1371 } 1372 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1373 } 1374 mumps->id.nrhs = nrhs; 1375 mumps->id.nloc_rhs = mumps->nloc_rhs; 1376 mumps->id.lrhs_loc = mumps->nloc_rhs; 1377 mumps->id.irhs_loc = mumps->irhs_loc; 1378 PetscFunctionReturn(PETSC_SUCCESS); 1379 } 1380 1381 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1382 { 1383 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1384 const PetscScalar *rarray = NULL; 1385 PetscScalar *array; 1386 IS is_iden, is_petsc; 1387 PetscInt i; 1388 PetscBool second_solve = PETSC_FALSE; 1389 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1390 1391 PetscFunctionBegin; 1392 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1393 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1394 &cite1)); 1395 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1396 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1397 &cite2)); 1398 1399 if (A->factorerrortype) { 1400 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1401 PetscCall(VecSetInf(x)); 1402 PetscFunctionReturn(PETSC_SUCCESS); 1403 } 1404 1405 mumps->id.nrhs = 1; 1406 if (mumps->petsc_size > 1) { 1407 if (mumps->ICNTL20 == 10) { 1408 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1409 PetscCall(VecGetArrayRead(b, &rarray)); 1410 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1411 } else { 1412 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1413 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1414 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1415 if (!mumps->myid) { 1416 PetscCall(VecGetArray(mumps->b_seq, &array)); 1417 mumps->id.rhs = (MumpsScalar *)array; 1418 } 1419 } 1420 } else { /* petsc_size == 1 */ 1421 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1422 PetscCall(VecCopy(b, x)); 1423 PetscCall(VecGetArray(x, &array)); 1424 mumps->id.rhs = (MumpsScalar *)array; 1425 } 1426 1427 /* 1428 handle condensation step of Schur complement (if any) 1429 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1430 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1431 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1432 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1433 */ 1434 if (mumps->id.size_schur > 0) { 1435 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1436 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1437 second_solve = PETSC_TRUE; 1438 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1439 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1440 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1441 } 1442 /* solve phase */ 1443 mumps->id.job = JOB_SOLVE; 1444 PetscMUMPS_c(mumps); 1445 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1446 1447 /* handle expansion step of Schur complement (if any) */ 1448 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1449 else if (mumps->id.ICNTL(26) == 1) { 1450 PetscCall(MatMumpsSolveSchur_Private(A)); 1451 for (i = 0; i < mumps->id.size_schur; ++i) { 1452 #if !defined(PETSC_USE_COMPLEX) 1453 PetscScalar val = mumps->id.redrhs[i]; 1454 #else 1455 PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i; 1456 #endif 1457 array[mumps->id.listvar_schur[i] - 1] = val; 1458 } 1459 } 1460 1461 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1462 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1463 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1464 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1465 } 1466 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1467 PetscInt *isol2_loc = NULL; 1468 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1469 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1470 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1471 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1472 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1473 PetscCall(ISDestroy(&is_iden)); 1474 PetscCall(ISDestroy(&is_petsc)); 1475 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1476 } 1477 1478 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1479 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1480 } 1481 1482 if (mumps->petsc_size > 1) { 1483 if (mumps->ICNTL20 == 10) { 1484 PetscCall(VecRestoreArrayRead(b, &rarray)); 1485 } else if (!mumps->myid) { 1486 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1487 } 1488 } else PetscCall(VecRestoreArray(x, &array)); 1489 1490 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1491 PetscFunctionReturn(PETSC_SUCCESS); 1492 } 1493 1494 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1495 { 1496 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1497 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1498 1499 PetscFunctionBegin; 1500 mumps->id.ICNTL(9) = 0; 1501 PetscCall(MatSolve_MUMPS(A, b, x)); 1502 mumps->id.ICNTL(9) = value; 1503 PetscFunctionReturn(PETSC_SUCCESS); 1504 } 1505 1506 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1507 { 1508 Mat Bt = NULL; 1509 PetscBool denseX, denseB, flg, flgT; 1510 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1511 PetscInt i, nrhs, M; 1512 PetscScalar *array; 1513 const PetscScalar *rbray; 1514 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1515 PetscMUMPSInt *isol_loc, *isol_loc_save; 1516 PetscScalar *bray, *sol_loc, *sol_loc_save; 1517 IS is_to, is_from; 1518 PetscInt k, proc, j, m, myrstart; 1519 const PetscInt *rstart; 1520 Vec v_mpi, msol_loc; 1521 VecScatter scat_sol; 1522 Vec b_seq; 1523 VecScatter scat_rhs; 1524 PetscScalar *aa; 1525 PetscInt spnr, *ia, *ja; 1526 Mat_MPIAIJ *b = NULL; 1527 1528 PetscFunctionBegin; 1529 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1530 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1531 1532 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1533 if (denseB) { 1534 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1535 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1536 } else { /* sparse B */ 1537 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1538 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1539 if (flgT) { /* input B is transpose of actual RHS matrix, 1540 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1541 PetscCall(MatTransposeGetMat(B, &Bt)); 1542 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1543 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1544 } 1545 1546 PetscCall(MatGetSize(B, &M, &nrhs)); 1547 mumps->id.nrhs = nrhs; 1548 mumps->id.lrhs = M; 1549 mumps->id.rhs = NULL; 1550 1551 if (mumps->petsc_size == 1) { 1552 PetscScalar *aa; 1553 PetscInt spnr, *ia, *ja; 1554 PetscBool second_solve = PETSC_FALSE; 1555 1556 PetscCall(MatDenseGetArray(X, &array)); 1557 mumps->id.rhs = (MumpsScalar *)array; 1558 1559 if (denseB) { 1560 /* copy B to X */ 1561 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1562 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1563 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1564 } else { /* sparse B */ 1565 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1566 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1567 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1568 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1569 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1570 } 1571 /* handle condensation step of Schur complement (if any) */ 1572 if (mumps->id.size_schur > 0) { 1573 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1574 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1575 second_solve = PETSC_TRUE; 1576 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1577 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1578 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1579 } 1580 /* solve phase */ 1581 mumps->id.job = JOB_SOLVE; 1582 PetscMUMPS_c(mumps); 1583 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1584 1585 /* handle expansion step of Schur complement (if any) */ 1586 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1587 else if (mumps->id.ICNTL(26) == 1) { 1588 PetscCall(MatMumpsSolveSchur_Private(A)); 1589 for (j = 0; j < nrhs; ++j) 1590 for (i = 0; i < mumps->id.size_schur; ++i) { 1591 #if !defined(PETSC_USE_COMPLEX) 1592 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs]; 1593 #else 1594 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i; 1595 #endif 1596 array[mumps->id.listvar_schur[i] - 1 + j * M] = val; 1597 } 1598 } 1599 if (!denseB) { /* sparse B */ 1600 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1601 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1602 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1603 } 1604 PetscCall(MatDenseRestoreArray(X, &array)); 1605 PetscFunctionReturn(PETSC_SUCCESS); 1606 } 1607 1608 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1609 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1610 1611 /* create msol_loc to hold mumps local solution */ 1612 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1613 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1614 1615 lsol_loc = mumps->id.lsol_loc; 1616 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1617 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1618 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1619 mumps->id.isol_loc = isol_loc; 1620 1621 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1622 1623 if (denseB) { 1624 if (mumps->ICNTL20 == 10) { 1625 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1626 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1627 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1628 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1629 PetscCall(MatGetLocalSize(B, &m, NULL)); 1630 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1631 } else { 1632 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1633 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1634 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1635 0, re-arrange B into desired order, which is a local operation. 1636 */ 1637 1638 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1639 /* wrap dense rhs matrix B into a vector v_mpi */ 1640 PetscCall(MatGetLocalSize(B, &m, NULL)); 1641 PetscCall(MatDenseGetArray(B, &bray)); 1642 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1643 PetscCall(MatDenseRestoreArray(B, &bray)); 1644 1645 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1646 if (!mumps->myid) { 1647 PetscInt *idx; 1648 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1649 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1650 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1651 k = 0; 1652 for (proc = 0; proc < mumps->petsc_size; proc++) { 1653 for (j = 0; j < nrhs; j++) { 1654 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1655 } 1656 } 1657 1658 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1659 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1660 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1661 } else { 1662 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1663 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1664 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1665 } 1666 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1667 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1668 PetscCall(ISDestroy(&is_to)); 1669 PetscCall(ISDestroy(&is_from)); 1670 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1671 1672 if (!mumps->myid) { /* define rhs on the host */ 1673 PetscCall(VecGetArray(b_seq, &bray)); 1674 mumps->id.rhs = (MumpsScalar *)bray; 1675 PetscCall(VecRestoreArray(b_seq, &bray)); 1676 } 1677 } 1678 } else { /* sparse B */ 1679 b = (Mat_MPIAIJ *)Bt->data; 1680 1681 /* wrap dense X into a vector v_mpi */ 1682 PetscCall(MatGetLocalSize(X, &m, NULL)); 1683 PetscCall(MatDenseGetArray(X, &bray)); 1684 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1685 PetscCall(MatDenseRestoreArray(X, &bray)); 1686 1687 if (!mumps->myid) { 1688 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1689 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1690 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1691 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1692 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1693 } else { 1694 mumps->id.irhs_ptr = NULL; 1695 mumps->id.irhs_sparse = NULL; 1696 mumps->id.nz_rhs = 0; 1697 mumps->id.rhs_sparse = NULL; 1698 } 1699 } 1700 1701 /* solve phase */ 1702 mumps->id.job = JOB_SOLVE; 1703 PetscMUMPS_c(mumps); 1704 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1705 1706 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1707 PetscCall(MatDenseGetArray(X, &array)); 1708 PetscCall(VecPlaceArray(v_mpi, array)); 1709 1710 /* create scatter scat_sol */ 1711 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1712 /* iidx: index for scatter mumps solution to petsc X */ 1713 1714 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1715 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1716 for (i = 0; i < lsol_loc; i++) { 1717 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1718 1719 for (proc = 0; proc < mumps->petsc_size; proc++) { 1720 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1721 myrstart = rstart[proc]; 1722 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1723 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1724 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1725 break; 1726 } 1727 } 1728 1729 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1730 } 1731 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1732 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1733 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1734 PetscCall(ISDestroy(&is_from)); 1735 PetscCall(ISDestroy(&is_to)); 1736 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1737 PetscCall(MatDenseRestoreArray(X, &array)); 1738 1739 /* free spaces */ 1740 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1741 mumps->id.isol_loc = isol_loc_save; 1742 1743 PetscCall(PetscFree2(sol_loc, isol_loc)); 1744 PetscCall(PetscFree(idxx)); 1745 PetscCall(VecDestroy(&msol_loc)); 1746 PetscCall(VecDestroy(&v_mpi)); 1747 if (!denseB) { 1748 if (!mumps->myid) { 1749 b = (Mat_MPIAIJ *)Bt->data; 1750 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1751 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1752 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1753 } 1754 } else { 1755 if (mumps->ICNTL20 == 0) { 1756 PetscCall(VecDestroy(&b_seq)); 1757 PetscCall(VecScatterDestroy(&scat_rhs)); 1758 } 1759 } 1760 PetscCall(VecScatterDestroy(&scat_sol)); 1761 PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)))); 1762 PetscFunctionReturn(PETSC_SUCCESS); 1763 } 1764 1765 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1766 { 1767 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1768 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1769 1770 PetscFunctionBegin; 1771 mumps->id.ICNTL(9) = 0; 1772 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1773 mumps->id.ICNTL(9) = value; 1774 PetscFunctionReturn(PETSC_SUCCESS); 1775 } 1776 1777 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1778 { 1779 PetscBool flg; 1780 Mat B; 1781 1782 PetscFunctionBegin; 1783 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1784 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1785 1786 /* Create B=Bt^T that uses Bt's data structure */ 1787 PetscCall(MatCreateTranspose(Bt, &B)); 1788 1789 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1790 PetscCall(MatDestroy(&B)); 1791 PetscFunctionReturn(PETSC_SUCCESS); 1792 } 1793 1794 #if !defined(PETSC_USE_COMPLEX) 1795 /* 1796 input: 1797 F: numeric factor 1798 output: 1799 nneg: total number of negative pivots 1800 nzero: total number of zero pivots 1801 npos: (global dimension of F) - nneg - nzero 1802 */ 1803 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1804 { 1805 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1806 PetscMPIInt size; 1807 1808 PetscFunctionBegin; 1809 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1810 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1811 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1812 1813 if (nneg) *nneg = mumps->id.INFOG(12); 1814 if (nzero || npos) { 1815 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1816 if (nzero) *nzero = mumps->id.INFOG(28); 1817 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1818 } 1819 PetscFunctionReturn(PETSC_SUCCESS); 1820 } 1821 #endif 1822 1823 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1824 { 1825 PetscInt i, nreqs; 1826 PetscMUMPSInt *irn, *jcn; 1827 PetscMPIInt count; 1828 PetscInt64 totnnz, remain; 1829 const PetscInt osize = mumps->omp_comm_size; 1830 PetscScalar *val; 1831 1832 PetscFunctionBegin; 1833 if (osize > 1) { 1834 if (reuse == MAT_INITIAL_MATRIX) { 1835 /* master first gathers counts of nonzeros to receive */ 1836 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1837 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1838 1839 /* Then each computes number of send/recvs */ 1840 if (mumps->is_omp_master) { 1841 /* Start from 1 since self communication is not done in MPI */ 1842 nreqs = 0; 1843 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1844 } else { 1845 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1846 } 1847 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1848 1849 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1850 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1851 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1852 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1853 */ 1854 nreqs = 0; /* counter for actual send/recvs */ 1855 if (mumps->is_omp_master) { 1856 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1857 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1858 PetscCall(PetscMalloc1(totnnz, &val)); 1859 1860 /* Self communication */ 1861 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1862 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1863 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1864 1865 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1866 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1867 PetscCall(PetscFree(mumps->val_alloc)); 1868 mumps->nnz = totnnz; 1869 mumps->irn = irn; 1870 mumps->jcn = jcn; 1871 mumps->val = mumps->val_alloc = val; 1872 1873 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1874 jcn += mumps->recvcount[0]; 1875 val += mumps->recvcount[0]; 1876 1877 /* Remote communication */ 1878 for (i = 1; i < osize; i++) { 1879 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1880 remain = mumps->recvcount[i] - count; 1881 while (count > 0) { 1882 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1883 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1884 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1885 irn += count; 1886 jcn += count; 1887 val += count; 1888 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1889 remain -= count; 1890 } 1891 } 1892 } else { 1893 irn = mumps->irn; 1894 jcn = mumps->jcn; 1895 val = mumps->val; 1896 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1897 remain = mumps->nnz - count; 1898 while (count > 0) { 1899 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1900 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1901 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1902 irn += count; 1903 jcn += count; 1904 val += count; 1905 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1906 remain -= count; 1907 } 1908 } 1909 } else { 1910 nreqs = 0; 1911 if (mumps->is_omp_master) { 1912 val = mumps->val + mumps->recvcount[0]; 1913 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1914 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1915 remain = mumps->recvcount[i] - count; 1916 while (count > 0) { 1917 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1918 val += count; 1919 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1920 remain -= count; 1921 } 1922 } 1923 } else { 1924 val = mumps->val; 1925 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1926 remain = mumps->nnz - count; 1927 while (count > 0) { 1928 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1929 val += count; 1930 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1931 remain -= count; 1932 } 1933 } 1934 } 1935 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1936 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1937 } 1938 PetscFunctionReturn(PETSC_SUCCESS); 1939 } 1940 1941 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1942 { 1943 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1944 PetscBool isMPIAIJ; 1945 1946 PetscFunctionBegin; 1947 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1948 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1949 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1950 PetscFunctionReturn(PETSC_SUCCESS); 1951 } 1952 1953 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1954 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1955 1956 /* numerical factorization phase */ 1957 mumps->id.job = JOB_FACTNUMERIC; 1958 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1959 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1960 } else { 1961 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1962 } 1963 PetscMUMPS_c(mumps); 1964 if (mumps->id.INFOG(1) < 0) { 1965 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 1966 if (mumps->id.INFOG(1) == -10) { 1967 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1968 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1969 } else if (mumps->id.INFOG(1) == -13) { 1970 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1971 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1972 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1973 PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1974 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1975 } else { 1976 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1977 F->factorerrortype = MAT_FACTOR_OTHER; 1978 } 1979 } 1980 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 1981 1982 F->assembled = PETSC_TRUE; 1983 1984 if (F->schur) { /* reset Schur status to unfactored */ 1985 #if defined(PETSC_HAVE_CUDA) 1986 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1987 #endif 1988 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1989 mumps->id.ICNTL(19) = 2; 1990 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1991 } 1992 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1993 } 1994 1995 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1996 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1997 1998 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1999 if (mumps->petsc_size > 1) { 2000 PetscInt lsol_loc; 2001 PetscScalar *sol_loc; 2002 2003 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 2004 2005 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2006 if (mumps->x_seq) { 2007 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2008 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 2009 PetscCall(VecDestroy(&mumps->x_seq)); 2010 } 2011 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2012 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 2013 mumps->id.lsol_loc = lsol_loc; 2014 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 2015 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 2016 } 2017 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 2018 PetscFunctionReturn(PETSC_SUCCESS); 2019 } 2020 2021 /* Sets MUMPS options from the options database */ 2022 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2023 { 2024 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2025 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2026 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2027 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 2028 MumpsScalar *arr; 2029 2030 PetscFunctionBegin; 2031 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2032 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2033 PetscInt nthreads = 0; 2034 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2035 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2036 2037 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2038 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2039 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2040 2041 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2042 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2043 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2044 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2045 if (mumps->use_petsc_omp_support) { 2046 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2047 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2048 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2049 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2050 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2051 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2052 #endif 2053 } else { 2054 mumps->omp_comm = PETSC_COMM_SELF; 2055 mumps->mumps_comm = mumps->petsc_comm; 2056 mumps->is_omp_master = PETSC_TRUE; 2057 } 2058 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2059 mumps->reqs = NULL; 2060 mumps->tag = 0; 2061 2062 if (mumps->mumps_comm != MPI_COMM_NULL) { 2063 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2064 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2065 MPI_Comm comm; 2066 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2067 mumps->mumps_comm = comm; 2068 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2069 } 2070 2071 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2072 mumps->id.job = JOB_INIT; 2073 mumps->id.par = 1; /* host participates factorizaton and solve */ 2074 mumps->id.sym = mumps->sym; 2075 2076 size = mumps->id.size_schur; 2077 arr = mumps->id.schur; 2078 listvar_schur = mumps->id.listvar_schur; 2079 PetscMUMPS_c(mumps); 2080 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2081 /* restore cached ICNTL and CNTL values */ 2082 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2083 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2084 PetscCall(PetscFree(mumps->ICNTL_pre)); 2085 PetscCall(PetscFree(mumps->CNTL_pre)); 2086 2087 if (schur) { 2088 mumps->id.size_schur = size; 2089 mumps->id.schur_lld = size; 2090 mumps->id.schur = arr; 2091 mumps->id.listvar_schur = listvar_schur; 2092 if (mumps->petsc_size > 1) { 2093 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2094 2095 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2096 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2097 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2098 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2099 } else { 2100 if (F->factortype == MAT_FACTOR_LU) { 2101 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2102 } else { 2103 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2104 } 2105 } 2106 mumps->id.ICNTL(26) = -1; 2107 } 2108 2109 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2110 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2111 */ 2112 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2113 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2114 2115 mumps->scat_rhs = NULL; 2116 mumps->scat_sol = NULL; 2117 2118 /* set PETSc-MUMPS default options - override MUMPS default */ 2119 mumps->id.ICNTL(3) = 0; 2120 mumps->id.ICNTL(4) = 0; 2121 if (mumps->petsc_size == 1) { 2122 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2123 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2124 } else { 2125 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2126 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2127 } 2128 } 2129 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2130 if (flg) mumps->id.ICNTL(1) = icntl; 2131 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2132 if (flg) mumps->id.ICNTL(2) = icntl; 2133 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2134 if (flg) mumps->id.ICNTL(3) = icntl; 2135 2136 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2137 if (flg) mumps->id.ICNTL(4) = icntl; 2138 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2139 2140 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2141 if (flg) mumps->id.ICNTL(6) = icntl; 2142 2143 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2144 if (flg) { 2145 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2146 mumps->id.ICNTL(7) = icntl; 2147 } 2148 2149 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2150 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2151 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2152 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2153 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2154 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2155 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2156 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2157 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2158 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2159 if (flg) { 2160 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2161 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2162 } 2163 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2164 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2165 PetscCall(MatDestroy(&F->schur)); 2166 PetscCall(MatMumpsResetSchur_Private(mumps)); 2167 } 2168 2169 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2170 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2171 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2172 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2173 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2174 In short, we could not use distributed RHS with MPICH until v4.0b1. 2175 */ 2176 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 2177 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2178 #else 2179 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2180 #endif 2181 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2182 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2183 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2184 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2185 #endif 2186 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2187 2188 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2189 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2190 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2191 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2192 2193 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2194 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2195 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2196 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2197 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2198 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2199 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2200 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2201 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2202 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2203 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2204 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2205 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2206 2207 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2208 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2209 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2210 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2211 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2212 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2213 2214 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2215 2216 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2217 if (ninfo) { 2218 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2219 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2220 mumps->ninfo = ninfo; 2221 for (i = 0; i < ninfo; i++) { 2222 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2223 mumps->info[i] = info[i]; 2224 } 2225 } 2226 PetscOptionsEnd(); 2227 PetscFunctionReturn(PETSC_SUCCESS); 2228 } 2229 2230 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2231 { 2232 PetscFunctionBegin; 2233 if (mumps->id.INFOG(1) < 0) { 2234 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2235 if (mumps->id.INFOG(1) == -6) { 2236 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2237 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2238 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2239 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2240 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2241 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2242 PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n")); 2243 } else { 2244 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2245 F->factorerrortype = MAT_FACTOR_OTHER; 2246 } 2247 } 2248 PetscFunctionReturn(PETSC_SUCCESS); 2249 } 2250 2251 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2252 { 2253 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2254 Vec b; 2255 const PetscInt M = A->rmap->N; 2256 2257 PetscFunctionBegin; 2258 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2259 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2260 PetscFunctionReturn(PETSC_SUCCESS); 2261 } 2262 2263 /* Set MUMPS options from the options database */ 2264 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2265 2266 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2267 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2268 2269 /* analysis phase */ 2270 mumps->id.job = JOB_FACTSYMBOLIC; 2271 mumps->id.n = M; 2272 switch (mumps->id.ICNTL(18)) { 2273 case 0: /* centralized assembled matrix input */ 2274 if (!mumps->myid) { 2275 mumps->id.nnz = mumps->nnz; 2276 mumps->id.irn = mumps->irn; 2277 mumps->id.jcn = mumps->jcn; 2278 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2279 if (r) { 2280 mumps->id.ICNTL(7) = 1; 2281 if (!mumps->myid) { 2282 const PetscInt *idx; 2283 PetscInt i; 2284 2285 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2286 PetscCall(ISGetIndices(r, &idx)); 2287 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2288 PetscCall(ISRestoreIndices(r, &idx)); 2289 } 2290 } 2291 } 2292 break; 2293 case 3: /* distributed assembled matrix input (size>1) */ 2294 mumps->id.nnz_loc = mumps->nnz; 2295 mumps->id.irn_loc = mumps->irn; 2296 mumps->id.jcn_loc = mumps->jcn; 2297 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2298 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2299 PetscCall(MatCreateVecs(A, NULL, &b)); 2300 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2301 PetscCall(VecDestroy(&b)); 2302 } 2303 break; 2304 } 2305 PetscMUMPS_c(mumps); 2306 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2307 2308 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2309 F->ops->solve = MatSolve_MUMPS; 2310 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2311 F->ops->matsolve = MatMatSolve_MUMPS; 2312 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2313 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2314 2315 mumps->matstruc = SAME_NONZERO_PATTERN; 2316 PetscFunctionReturn(PETSC_SUCCESS); 2317 } 2318 2319 /* Note the Petsc r and c permutations are ignored */ 2320 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2321 { 2322 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2323 Vec b; 2324 const PetscInt M = A->rmap->N; 2325 2326 PetscFunctionBegin; 2327 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2328 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2329 PetscFunctionReturn(PETSC_SUCCESS); 2330 } 2331 2332 /* Set MUMPS options from the options database */ 2333 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2334 2335 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2336 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2337 2338 /* analysis phase */ 2339 mumps->id.job = JOB_FACTSYMBOLIC; 2340 mumps->id.n = M; 2341 switch (mumps->id.ICNTL(18)) { 2342 case 0: /* centralized assembled matrix input */ 2343 if (!mumps->myid) { 2344 mumps->id.nnz = mumps->nnz; 2345 mumps->id.irn = mumps->irn; 2346 mumps->id.jcn = mumps->jcn; 2347 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2348 } 2349 break; 2350 case 3: /* distributed assembled matrix input (size>1) */ 2351 mumps->id.nnz_loc = mumps->nnz; 2352 mumps->id.irn_loc = mumps->irn; 2353 mumps->id.jcn_loc = mumps->jcn; 2354 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2355 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2356 PetscCall(MatCreateVecs(A, NULL, &b)); 2357 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2358 PetscCall(VecDestroy(&b)); 2359 } 2360 break; 2361 } 2362 PetscMUMPS_c(mumps); 2363 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2364 2365 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2366 F->ops->solve = MatSolve_MUMPS; 2367 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2368 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2369 2370 mumps->matstruc = SAME_NONZERO_PATTERN; 2371 PetscFunctionReturn(PETSC_SUCCESS); 2372 } 2373 2374 /* Note the Petsc r permutation and factor info are ignored */ 2375 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2376 { 2377 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2378 Vec b; 2379 const PetscInt M = A->rmap->N; 2380 2381 PetscFunctionBegin; 2382 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2383 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2384 PetscFunctionReturn(PETSC_SUCCESS); 2385 } 2386 2387 /* Set MUMPS options from the options database */ 2388 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2389 2390 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2391 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2392 2393 /* analysis phase */ 2394 mumps->id.job = JOB_FACTSYMBOLIC; 2395 mumps->id.n = M; 2396 switch (mumps->id.ICNTL(18)) { 2397 case 0: /* centralized assembled matrix input */ 2398 if (!mumps->myid) { 2399 mumps->id.nnz = mumps->nnz; 2400 mumps->id.irn = mumps->irn; 2401 mumps->id.jcn = mumps->jcn; 2402 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2403 } 2404 break; 2405 case 3: /* distributed assembled matrix input (size>1) */ 2406 mumps->id.nnz_loc = mumps->nnz; 2407 mumps->id.irn_loc = mumps->irn; 2408 mumps->id.jcn_loc = mumps->jcn; 2409 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2410 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2411 PetscCall(MatCreateVecs(A, NULL, &b)); 2412 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2413 PetscCall(VecDestroy(&b)); 2414 } 2415 break; 2416 } 2417 PetscMUMPS_c(mumps); 2418 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2419 2420 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2421 F->ops->solve = MatSolve_MUMPS; 2422 F->ops->solvetranspose = MatSolve_MUMPS; 2423 F->ops->matsolve = MatMatSolve_MUMPS; 2424 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2425 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2426 #if defined(PETSC_USE_COMPLEX) 2427 F->ops->getinertia = NULL; 2428 #else 2429 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2430 #endif 2431 2432 mumps->matstruc = SAME_NONZERO_PATTERN; 2433 PetscFunctionReturn(PETSC_SUCCESS); 2434 } 2435 2436 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2437 { 2438 PetscBool iascii; 2439 PetscViewerFormat format; 2440 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2441 2442 PetscFunctionBegin; 2443 /* check if matrix is mumps type */ 2444 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2445 2446 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2447 if (iascii) { 2448 PetscCall(PetscViewerGetFormat(viewer, &format)); 2449 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2450 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2451 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2452 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2453 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2454 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2455 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2456 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2457 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2458 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2459 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2460 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2461 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2462 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2463 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2464 if (mumps->id.ICNTL(11) > 0) { 2465 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2466 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2467 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2468 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2469 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2470 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2471 } 2472 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2473 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2474 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2475 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2476 /* ICNTL(15-17) not used */ 2477 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2478 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2479 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2480 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2481 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2482 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2483 2484 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2485 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2486 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2487 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2488 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2489 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2490 2491 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2492 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2493 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2494 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2495 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2496 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2497 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2498 2499 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2500 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2501 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2502 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2503 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2504 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2505 2506 /* information local to each processor */ 2507 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2508 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2509 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2510 PetscCall(PetscViewerFlush(viewer)); 2511 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2512 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2513 PetscCall(PetscViewerFlush(viewer)); 2514 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2515 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2516 PetscCall(PetscViewerFlush(viewer)); 2517 2518 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2519 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2520 PetscCall(PetscViewerFlush(viewer)); 2521 2522 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2523 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2524 PetscCall(PetscViewerFlush(viewer)); 2525 2526 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2527 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2528 PetscCall(PetscViewerFlush(viewer)); 2529 2530 if (mumps->ninfo && mumps->ninfo <= 80) { 2531 PetscInt i; 2532 for (i = 0; i < mumps->ninfo; i++) { 2533 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2534 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2535 PetscCall(PetscViewerFlush(viewer)); 2536 } 2537 } 2538 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2539 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2540 2541 if (mumps->myid == 0) { /* information from the host */ 2542 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2543 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2544 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2545 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2546 2547 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2548 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2549 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2550 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2551 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2552 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2553 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2554 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2555 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2556 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2557 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2558 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2559 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2560 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2561 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2562 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2563 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2564 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2565 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2566 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2567 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2568 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2569 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2570 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2571 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2572 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2573 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2574 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2575 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2576 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2577 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2578 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2579 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2580 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2581 } 2582 } 2583 } 2584 PetscFunctionReturn(PETSC_SUCCESS); 2585 } 2586 2587 static PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2588 { 2589 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2590 2591 PetscFunctionBegin; 2592 info->block_size = 1.0; 2593 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2594 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2595 info->nz_unneeded = 0.0; 2596 info->assemblies = 0.0; 2597 info->mallocs = 0.0; 2598 info->memory = 0.0; 2599 info->fill_ratio_given = 0; 2600 info->fill_ratio_needed = 0; 2601 info->factor_mallocs = 0; 2602 PetscFunctionReturn(PETSC_SUCCESS); 2603 } 2604 2605 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2606 { 2607 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2608 const PetscScalar *arr; 2609 const PetscInt *idxs; 2610 PetscInt size, i; 2611 2612 PetscFunctionBegin; 2613 PetscCall(ISGetLocalSize(is, &size)); 2614 /* Schur complement matrix */ 2615 PetscCall(MatDestroy(&F->schur)); 2616 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2617 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2618 mumps->id.schur = (MumpsScalar *)arr; 2619 mumps->id.size_schur = size; 2620 mumps->id.schur_lld = size; 2621 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2622 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2623 2624 /* MUMPS expects Fortran style indices */ 2625 PetscCall(PetscFree(mumps->id.listvar_schur)); 2626 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2627 PetscCall(ISGetIndices(is, &idxs)); 2628 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2629 PetscCall(ISRestoreIndices(is, &idxs)); 2630 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2631 mumps->id.ICNTL(26) = -1; 2632 PetscFunctionReturn(PETSC_SUCCESS); 2633 } 2634 2635 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2636 { 2637 Mat St; 2638 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2639 PetscScalar *array; 2640 2641 PetscFunctionBegin; 2642 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 2643 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2644 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2645 PetscCall(MatSetType(St, MATDENSE)); 2646 PetscCall(MatSetUp(St)); 2647 PetscCall(MatDenseGetArray(St, &array)); 2648 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2649 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2650 PetscInt i, j, N = mumps->id.size_schur; 2651 for (i = 0; i < N; i++) { 2652 for (j = 0; j < N; j++) { 2653 #if !defined(PETSC_USE_COMPLEX) 2654 PetscScalar val = mumps->id.schur[i * N + j]; 2655 #else 2656 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2657 #endif 2658 array[j * N + i] = val; 2659 } 2660 } 2661 } else { /* stored by columns */ 2662 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2663 } 2664 } else { /* either full or lower-triangular (not packed) */ 2665 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2666 PetscInt i, j, N = mumps->id.size_schur; 2667 for (i = 0; i < N; i++) { 2668 for (j = i; j < N; j++) { 2669 #if !defined(PETSC_USE_COMPLEX) 2670 PetscScalar val = mumps->id.schur[i * N + j]; 2671 #else 2672 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2673 #endif 2674 array[i * N + j] = array[j * N + i] = val; 2675 } 2676 } 2677 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2678 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2679 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2680 PetscInt i, j, N = mumps->id.size_schur; 2681 for (i = 0; i < N; i++) { 2682 for (j = 0; j < i + 1; j++) { 2683 #if !defined(PETSC_USE_COMPLEX) 2684 PetscScalar val = mumps->id.schur[i * N + j]; 2685 #else 2686 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2687 #endif 2688 array[i * N + j] = array[j * N + i] = val; 2689 } 2690 } 2691 } 2692 } 2693 PetscCall(MatDenseRestoreArray(St, &array)); 2694 *S = St; 2695 PetscFunctionReturn(PETSC_SUCCESS); 2696 } 2697 2698 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2699 { 2700 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2701 2702 PetscFunctionBegin; 2703 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2704 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2705 for (i = 0; i < nICNTL_pre; ++i) 2706 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2707 if (i == nICNTL_pre) { /* not already cached */ 2708 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2709 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2710 mumps->ICNTL_pre[0]++; 2711 } 2712 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2713 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2714 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2715 PetscFunctionReturn(PETSC_SUCCESS); 2716 } 2717 2718 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2719 { 2720 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2721 2722 PetscFunctionBegin; 2723 if (mumps->id.job == JOB_NULL) { 2724 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2725 *ival = 0; 2726 for (i = 0; i < nICNTL_pre; ++i) { 2727 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2728 } 2729 } else *ival = mumps->id.ICNTL(icntl); 2730 PetscFunctionReturn(PETSC_SUCCESS); 2731 } 2732 2733 /*@ 2734 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2735 2736 Logically Collective 2737 2738 Input Parameters: 2739 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2740 . icntl - index of MUMPS parameter array ICNTL() 2741 - ival - value of MUMPS ICNTL(icntl) 2742 2743 Options Database Key: 2744 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2745 2746 Level: beginner 2747 2748 References: 2749 . * - MUMPS Users' Guide 2750 2751 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2752 @*/ 2753 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2754 { 2755 PetscFunctionBegin; 2756 PetscValidType(F, 1); 2757 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2758 PetscValidLogicalCollectiveInt(F, icntl, 2); 2759 PetscValidLogicalCollectiveInt(F, ival, 3); 2760 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2761 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2762 PetscFunctionReturn(PETSC_SUCCESS); 2763 } 2764 2765 /*@ 2766 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2767 2768 Logically Collective 2769 2770 Input Parameters: 2771 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2772 - icntl - index of MUMPS parameter array ICNTL() 2773 2774 Output Parameter: 2775 . ival - value of MUMPS ICNTL(icntl) 2776 2777 Level: beginner 2778 2779 References: 2780 . * - MUMPS Users' Guide 2781 2782 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2783 @*/ 2784 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2785 { 2786 PetscFunctionBegin; 2787 PetscValidType(F, 1); 2788 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2789 PetscValidLogicalCollectiveInt(F, icntl, 2); 2790 PetscAssertPointer(ival, 3); 2791 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2792 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2793 PetscFunctionReturn(PETSC_SUCCESS); 2794 } 2795 2796 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2797 { 2798 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2799 2800 PetscFunctionBegin; 2801 if (mumps->id.job == JOB_NULL) { 2802 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2803 for (i = 0; i < nCNTL_pre; ++i) 2804 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2805 if (i == nCNTL_pre) { 2806 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2807 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2808 mumps->CNTL_pre[0]++; 2809 } 2810 mumps->CNTL_pre[1 + 2 * i] = icntl; 2811 mumps->CNTL_pre[2 + 2 * i] = val; 2812 } else mumps->id.CNTL(icntl) = val; 2813 PetscFunctionReturn(PETSC_SUCCESS); 2814 } 2815 2816 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2817 { 2818 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2819 2820 PetscFunctionBegin; 2821 if (mumps->id.job == JOB_NULL) { 2822 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2823 *val = 0.0; 2824 for (i = 0; i < nCNTL_pre; ++i) { 2825 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2826 } 2827 } else *val = mumps->id.CNTL(icntl); 2828 PetscFunctionReturn(PETSC_SUCCESS); 2829 } 2830 2831 /*@ 2832 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2833 2834 Logically Collective 2835 2836 Input Parameters: 2837 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2838 . icntl - index of MUMPS parameter array CNTL() 2839 - val - value of MUMPS CNTL(icntl) 2840 2841 Options Database Key: 2842 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2843 2844 Level: beginner 2845 2846 References: 2847 . * - MUMPS Users' Guide 2848 2849 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2850 @*/ 2851 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2852 { 2853 PetscFunctionBegin; 2854 PetscValidType(F, 1); 2855 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2856 PetscValidLogicalCollectiveInt(F, icntl, 2); 2857 PetscValidLogicalCollectiveReal(F, val, 3); 2858 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2859 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2860 PetscFunctionReturn(PETSC_SUCCESS); 2861 } 2862 2863 /*@ 2864 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2865 2866 Logically Collective 2867 2868 Input Parameters: 2869 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2870 - icntl - index of MUMPS parameter array CNTL() 2871 2872 Output Parameter: 2873 . val - value of MUMPS CNTL(icntl) 2874 2875 Level: beginner 2876 2877 References: 2878 . * - MUMPS Users' Guide 2879 2880 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2881 @*/ 2882 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2883 { 2884 PetscFunctionBegin; 2885 PetscValidType(F, 1); 2886 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2887 PetscValidLogicalCollectiveInt(F, icntl, 2); 2888 PetscAssertPointer(val, 3); 2889 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2890 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2891 PetscFunctionReturn(PETSC_SUCCESS); 2892 } 2893 2894 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2895 { 2896 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2897 2898 PetscFunctionBegin; 2899 *info = mumps->id.INFO(icntl); 2900 PetscFunctionReturn(PETSC_SUCCESS); 2901 } 2902 2903 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2904 { 2905 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2906 2907 PetscFunctionBegin; 2908 *infog = mumps->id.INFOG(icntl); 2909 PetscFunctionReturn(PETSC_SUCCESS); 2910 } 2911 2912 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2913 { 2914 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2915 2916 PetscFunctionBegin; 2917 *rinfo = mumps->id.RINFO(icntl); 2918 PetscFunctionReturn(PETSC_SUCCESS); 2919 } 2920 2921 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2922 { 2923 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2924 2925 PetscFunctionBegin; 2926 *rinfog = mumps->id.RINFOG(icntl); 2927 PetscFunctionReturn(PETSC_SUCCESS); 2928 } 2929 2930 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2931 { 2932 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2933 2934 PetscFunctionBegin; 2935 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2936 *size = 0; 2937 *array = NULL; 2938 if (!mumps->myid) { 2939 *size = mumps->id.INFOG(28); 2940 PetscCall(PetscMalloc1(*size, array)); 2941 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2942 } 2943 PetscFunctionReturn(PETSC_SUCCESS); 2944 } 2945 2946 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2947 { 2948 Mat Bt = NULL, Btseq = NULL; 2949 PetscBool flg; 2950 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2951 PetscScalar *aa; 2952 PetscInt spnr, *ia, *ja, M, nrhs; 2953 2954 PetscFunctionBegin; 2955 PetscAssertPointer(spRHS, 2); 2956 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2957 if (flg) { 2958 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2959 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2960 2961 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2962 2963 if (mumps->petsc_size > 1) { 2964 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2965 Btseq = b->A; 2966 } else { 2967 Btseq = Bt; 2968 } 2969 2970 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2971 mumps->id.nrhs = nrhs; 2972 mumps->id.lrhs = M; 2973 mumps->id.rhs = NULL; 2974 2975 if (!mumps->myid) { 2976 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2977 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2978 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2979 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2980 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2981 } else { 2982 mumps->id.irhs_ptr = NULL; 2983 mumps->id.irhs_sparse = NULL; 2984 mumps->id.nz_rhs = 0; 2985 mumps->id.rhs_sparse = NULL; 2986 } 2987 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2988 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2989 2990 /* solve phase */ 2991 mumps->id.job = JOB_SOLVE; 2992 PetscMUMPS_c(mumps); 2993 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2994 2995 if (!mumps->myid) { 2996 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 2997 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2998 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2999 } 3000 PetscFunctionReturn(PETSC_SUCCESS); 3001 } 3002 3003 /*@ 3004 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 3005 3006 Logically Collective 3007 3008 Input Parameter: 3009 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3010 3011 Output Parameter: 3012 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3013 3014 Level: beginner 3015 3016 References: 3017 . * - MUMPS Users' Guide 3018 3019 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3020 @*/ 3021 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3022 { 3023 PetscFunctionBegin; 3024 PetscValidType(F, 1); 3025 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3026 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3027 PetscFunctionReturn(PETSC_SUCCESS); 3028 } 3029 3030 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3031 { 3032 Mat spRHS; 3033 3034 PetscFunctionBegin; 3035 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3036 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3037 PetscCall(MatDestroy(&spRHS)); 3038 PetscFunctionReturn(PETSC_SUCCESS); 3039 } 3040 3041 /*@ 3042 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 3043 3044 Logically Collective 3045 3046 Input Parameter: 3047 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3048 3049 Output Parameter: 3050 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3051 3052 Level: beginner 3053 3054 References: 3055 . * - MUMPS Users' Guide 3056 3057 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3058 @*/ 3059 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3060 { 3061 PetscBool flg; 3062 3063 PetscFunctionBegin; 3064 PetscValidType(F, 1); 3065 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3066 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3067 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3068 3069 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3070 PetscFunctionReturn(PETSC_SUCCESS); 3071 } 3072 3073 /*@ 3074 MatMumpsGetInfo - Get MUMPS parameter INFO() 3075 3076 Logically Collective 3077 3078 Input Parameters: 3079 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3080 - icntl - index of MUMPS parameter array INFO() 3081 3082 Output Parameter: 3083 . ival - value of MUMPS INFO(icntl) 3084 3085 Level: beginner 3086 3087 References: 3088 . * - MUMPS Users' Guide 3089 3090 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3091 @*/ 3092 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3093 { 3094 PetscFunctionBegin; 3095 PetscValidType(F, 1); 3096 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3097 PetscAssertPointer(ival, 3); 3098 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3099 PetscFunctionReturn(PETSC_SUCCESS); 3100 } 3101 3102 /*@ 3103 MatMumpsGetInfog - Get MUMPS parameter INFOG() 3104 3105 Logically Collective 3106 3107 Input Parameters: 3108 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3109 - icntl - index of MUMPS parameter array INFOG() 3110 3111 Output Parameter: 3112 . ival - value of MUMPS INFOG(icntl) 3113 3114 Level: beginner 3115 3116 References: 3117 . * - MUMPS Users' Guide 3118 3119 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3120 @*/ 3121 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3122 { 3123 PetscFunctionBegin; 3124 PetscValidType(F, 1); 3125 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3126 PetscAssertPointer(ival, 3); 3127 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3128 PetscFunctionReturn(PETSC_SUCCESS); 3129 } 3130 3131 /*@ 3132 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3133 3134 Logically Collective 3135 3136 Input Parameters: 3137 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3138 - icntl - index of MUMPS parameter array RINFO() 3139 3140 Output Parameter: 3141 . val - value of MUMPS RINFO(icntl) 3142 3143 Level: beginner 3144 3145 References: 3146 . * - MUMPS Users' Guide 3147 3148 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3149 @*/ 3150 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3151 { 3152 PetscFunctionBegin; 3153 PetscValidType(F, 1); 3154 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3155 PetscAssertPointer(val, 3); 3156 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3157 PetscFunctionReturn(PETSC_SUCCESS); 3158 } 3159 3160 /*@ 3161 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3162 3163 Logically Collective 3164 3165 Input Parameters: 3166 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3167 - icntl - index of MUMPS parameter array RINFOG() 3168 3169 Output Parameter: 3170 . val - value of MUMPS RINFOG(icntl) 3171 3172 Level: beginner 3173 3174 References: 3175 . * - MUMPS Users' Guide 3176 3177 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3178 @*/ 3179 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3180 { 3181 PetscFunctionBegin; 3182 PetscValidType(F, 1); 3183 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3184 PetscAssertPointer(val, 3); 3185 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3186 PetscFunctionReturn(PETSC_SUCCESS); 3187 } 3188 3189 /*@ 3190 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 3191 3192 Logically Collective 3193 3194 Input Parameter: 3195 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3196 3197 Output Parameters: 3198 + size - local size of the array. The size of the array is non-zero only on the host. 3199 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3200 for freeing this array. 3201 3202 Level: beginner 3203 3204 References: 3205 . * - MUMPS Users' Guide 3206 3207 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3208 @*/ 3209 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3210 { 3211 PetscFunctionBegin; 3212 PetscValidType(F, 1); 3213 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3214 PetscAssertPointer(size, 2); 3215 PetscAssertPointer(array, 3); 3216 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3217 PetscFunctionReturn(PETSC_SUCCESS); 3218 } 3219 3220 /*MC 3221 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3222 distributed and sequential matrices via the external package MUMPS. 3223 3224 Works with `MATAIJ` and `MATSBAIJ` matrices 3225 3226 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3227 3228 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3229 See details below. 3230 3231 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3232 3233 Options Database Keys: 3234 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3235 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3236 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3237 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3238 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3239 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3240 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3241 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3242 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3243 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3244 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3245 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3246 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3247 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3248 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3249 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3250 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3251 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3252 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3253 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3254 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3255 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 3256 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3257 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3258 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3259 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3260 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3261 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3262 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3263 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3264 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3265 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3266 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3267 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3268 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3269 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3270 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3271 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3272 3273 Level: beginner 3274 3275 Notes: 3276 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 3277 error if the matrix is Hermitian. 3278 3279 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3280 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3281 3282 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3283 the failure with 3284 .vb 3285 KSPGetPC(ksp,&pc); 3286 PCFactorGetMatrix(pc,&mat); 3287 MatMumpsGetInfo(mat,....); 3288 MatMumpsGetInfog(mat,....); etc. 3289 .ve 3290 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3291 3292 MUMPS provides 64-bit integer support in two build modes: 3293 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3294 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3295 3296 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3297 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3298 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3299 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3300 3301 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3302 3303 Two modes to run MUMPS/PETSc with OpenMP 3304 .vb 3305 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3306 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3307 .ve 3308 3309 .vb 3310 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3311 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3312 .ve 3313 3314 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3315 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3316 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3317 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3318 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3319 3320 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3321 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3322 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3323 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3324 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3325 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3326 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3327 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3328 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3329 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3330 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3331 examine the mapping result. 3332 3333 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3334 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3335 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3336 3337 References: 3338 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3339 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3340 3341 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3342 M*/ 3343 3344 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3345 { 3346 PetscFunctionBegin; 3347 *type = MATSOLVERMUMPS; 3348 PetscFunctionReturn(PETSC_SUCCESS); 3349 } 3350 3351 /* MatGetFactor for Seq and MPI AIJ matrices */ 3352 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3353 { 3354 Mat B; 3355 Mat_MUMPS *mumps; 3356 PetscBool isSeqAIJ, isDiag; 3357 PetscMPIInt size; 3358 3359 PetscFunctionBegin; 3360 #if defined(PETSC_USE_COMPLEX) 3361 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3362 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3363 *F = NULL; 3364 PetscFunctionReturn(PETSC_SUCCESS); 3365 } 3366 #endif 3367 /* Create the factorization matrix */ 3368 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3369 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 3370 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3371 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3372 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3373 PetscCall(MatSetUp(B)); 3374 3375 PetscCall(PetscNew(&mumps)); 3376 3377 B->ops->view = MatView_MUMPS; 3378 B->ops->getinfo = MatGetInfo_MUMPS; 3379 3380 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3381 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3382 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3383 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3384 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3385 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3386 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3387 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3388 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3389 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3390 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3391 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3392 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3393 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3394 3395 if (ftype == MAT_FACTOR_LU) { 3396 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3397 B->factortype = MAT_FACTOR_LU; 3398 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3399 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3400 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3401 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3402 mumps->sym = 0; 3403 } else { 3404 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3405 B->factortype = MAT_FACTOR_CHOLESKY; 3406 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3407 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3408 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3409 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3410 #if defined(PETSC_USE_COMPLEX) 3411 mumps->sym = 2; 3412 #else 3413 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3414 else mumps->sym = 2; 3415 #endif 3416 } 3417 3418 /* set solvertype */ 3419 PetscCall(PetscFree(B->solvertype)); 3420 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3421 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3422 if (size == 1) { 3423 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3424 B->canuseordering = PETSC_TRUE; 3425 } 3426 B->ops->destroy = MatDestroy_MUMPS; 3427 B->data = (void *)mumps; 3428 3429 *F = B; 3430 mumps->id.job = JOB_NULL; 3431 mumps->ICNTL_pre = NULL; 3432 mumps->CNTL_pre = NULL; 3433 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3434 PetscFunctionReturn(PETSC_SUCCESS); 3435 } 3436 3437 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3438 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3439 { 3440 Mat B; 3441 Mat_MUMPS *mumps; 3442 PetscBool isSeqSBAIJ; 3443 PetscMPIInt size; 3444 3445 PetscFunctionBegin; 3446 #if defined(PETSC_USE_COMPLEX) 3447 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3448 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3449 *F = NULL; 3450 PetscFunctionReturn(PETSC_SUCCESS); 3451 } 3452 #endif 3453 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3454 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3455 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3456 PetscCall(MatSetUp(B)); 3457 3458 PetscCall(PetscNew(&mumps)); 3459 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3460 if (isSeqSBAIJ) { 3461 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3462 } else { 3463 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3464 } 3465 3466 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3467 B->ops->view = MatView_MUMPS; 3468 B->ops->getinfo = MatGetInfo_MUMPS; 3469 3470 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3471 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3472 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3473 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3474 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3475 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3476 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3477 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3478 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3479 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3480 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3481 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3482 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3483 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3484 3485 B->factortype = MAT_FACTOR_CHOLESKY; 3486 #if defined(PETSC_USE_COMPLEX) 3487 mumps->sym = 2; 3488 #else 3489 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3490 else mumps->sym = 2; 3491 #endif 3492 3493 /* set solvertype */ 3494 PetscCall(PetscFree(B->solvertype)); 3495 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3496 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3497 if (size == 1) { 3498 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3499 B->canuseordering = PETSC_TRUE; 3500 } 3501 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3502 B->ops->destroy = MatDestroy_MUMPS; 3503 B->data = (void *)mumps; 3504 3505 *F = B; 3506 mumps->id.job = JOB_NULL; 3507 mumps->ICNTL_pre = NULL; 3508 mumps->CNTL_pre = NULL; 3509 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3510 PetscFunctionReturn(PETSC_SUCCESS); 3511 } 3512 3513 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3514 { 3515 Mat B; 3516 Mat_MUMPS *mumps; 3517 PetscBool isSeqBAIJ; 3518 PetscMPIInt size; 3519 3520 PetscFunctionBegin; 3521 /* Create the factorization matrix */ 3522 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3523 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3524 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3525 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3526 PetscCall(MatSetUp(B)); 3527 3528 PetscCall(PetscNew(&mumps)); 3529 if (ftype == MAT_FACTOR_LU) { 3530 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3531 B->factortype = MAT_FACTOR_LU; 3532 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3533 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3534 mumps->sym = 0; 3535 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3536 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3537 3538 B->ops->view = MatView_MUMPS; 3539 B->ops->getinfo = MatGetInfo_MUMPS; 3540 3541 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3542 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3543 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3544 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3545 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3546 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3547 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3548 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3549 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3550 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3551 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3552 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3553 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3554 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3555 3556 /* set solvertype */ 3557 PetscCall(PetscFree(B->solvertype)); 3558 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3559 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3560 if (size == 1) { 3561 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3562 B->canuseordering = PETSC_TRUE; 3563 } 3564 B->ops->destroy = MatDestroy_MUMPS; 3565 B->data = (void *)mumps; 3566 3567 *F = B; 3568 mumps->id.job = JOB_NULL; 3569 mumps->ICNTL_pre = NULL; 3570 mumps->CNTL_pre = NULL; 3571 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3572 PetscFunctionReturn(PETSC_SUCCESS); 3573 } 3574 3575 /* MatGetFactor for Seq and MPI SELL matrices */ 3576 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3577 { 3578 Mat B; 3579 Mat_MUMPS *mumps; 3580 PetscBool isSeqSELL; 3581 PetscMPIInt size; 3582 3583 PetscFunctionBegin; 3584 /* Create the factorization matrix */ 3585 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3586 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3587 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3588 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3589 PetscCall(MatSetUp(B)); 3590 3591 PetscCall(PetscNew(&mumps)); 3592 3593 B->ops->view = MatView_MUMPS; 3594 B->ops->getinfo = MatGetInfo_MUMPS; 3595 3596 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3597 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3598 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3599 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3600 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3601 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3602 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3603 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3604 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3605 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3606 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3607 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3608 3609 if (ftype == MAT_FACTOR_LU) { 3610 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3611 B->factortype = MAT_FACTOR_LU; 3612 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3613 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3614 mumps->sym = 0; 3615 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3616 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3617 3618 /* set solvertype */ 3619 PetscCall(PetscFree(B->solvertype)); 3620 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3621 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3622 if (size == 1) { 3623 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3624 B->canuseordering = PETSC_TRUE; 3625 } 3626 B->ops->destroy = MatDestroy_MUMPS; 3627 B->data = (void *)mumps; 3628 3629 *F = B; 3630 mumps->id.job = JOB_NULL; 3631 mumps->ICNTL_pre = NULL; 3632 mumps->CNTL_pre = NULL; 3633 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3634 PetscFunctionReturn(PETSC_SUCCESS); 3635 } 3636 3637 /* MatGetFactor for MATNEST matrices */ 3638 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3639 { 3640 Mat B, **mats; 3641 Mat_MUMPS *mumps; 3642 PetscInt nr, nc; 3643 PetscMPIInt size; 3644 PetscBool flg = PETSC_TRUE; 3645 3646 PetscFunctionBegin; 3647 #if defined(PETSC_USE_COMPLEX) 3648 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3649 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3650 *F = NULL; 3651 PetscFunctionReturn(PETSC_SUCCESS); 3652 } 3653 #endif 3654 3655 /* Return if some condition is not satisfied */ 3656 *F = NULL; 3657 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3658 if (ftype == MAT_FACTOR_CHOLESKY) { 3659 IS *rows, *cols; 3660 PetscInt *m, *M; 3661 3662 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3663 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3664 PetscCall(MatNestGetISs(A, rows, cols)); 3665 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3666 if (!flg) { 3667 PetscCall(PetscFree2(rows, cols)); 3668 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3669 PetscFunctionReturn(PETSC_SUCCESS); 3670 } 3671 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3672 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3673 for (PetscInt r = 0; flg && r < nr; r++) 3674 for (PetscInt k = r + 1; flg && k < nr; k++) 3675 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3676 PetscCall(PetscFree2(m, M)); 3677 PetscCall(PetscFree2(rows, cols)); 3678 if (!flg) { 3679 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3680 PetscFunctionReturn(PETSC_SUCCESS); 3681 } 3682 } 3683 3684 for (PetscInt r = 0; r < nr; r++) { 3685 for (PetscInt c = 0; c < nc; c++) { 3686 Mat sub = mats[r][c]; 3687 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isDiag; 3688 3689 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3690 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3691 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3692 else { 3693 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3694 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3695 } 3696 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3697 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3698 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3699 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3700 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3701 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3702 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 3703 if (ftype == MAT_FACTOR_CHOLESKY) { 3704 if (r == c) { 3705 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag) { 3706 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3707 flg = PETSC_FALSE; 3708 } 3709 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag) { 3710 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3711 flg = PETSC_FALSE; 3712 } 3713 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag) { 3714 PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3715 flg = PETSC_FALSE; 3716 } 3717 } 3718 } 3719 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3720 3721 /* Create the factorization matrix */ 3722 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3723 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3724 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3725 PetscCall(MatSetUp(B)); 3726 3727 PetscCall(PetscNew(&mumps)); 3728 3729 B->ops->view = MatView_MUMPS; 3730 B->ops->getinfo = MatGetInfo_MUMPS; 3731 3732 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3733 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3734 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3735 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3736 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3737 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3738 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3739 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3740 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3741 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3742 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3743 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3744 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3745 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3746 3747 if (ftype == MAT_FACTOR_LU) { 3748 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3749 B->factortype = MAT_FACTOR_LU; 3750 mumps->sym = 0; 3751 } else { 3752 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3753 B->factortype = MAT_FACTOR_CHOLESKY; 3754 #if defined(PETSC_USE_COMPLEX) 3755 mumps->sym = 2; 3756 #else 3757 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3758 else mumps->sym = 2; 3759 #endif 3760 } 3761 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3762 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3763 3764 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3765 if (size == 1) { 3766 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3767 B->canuseordering = PETSC_TRUE; 3768 } 3769 3770 /* set solvertype */ 3771 PetscCall(PetscFree(B->solvertype)); 3772 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3773 B->ops->destroy = MatDestroy_MUMPS; 3774 B->data = (void *)mumps; 3775 3776 *F = B; 3777 mumps->id.job = JOB_NULL; 3778 mumps->ICNTL_pre = NULL; 3779 mumps->CNTL_pre = NULL; 3780 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3781 PetscFunctionReturn(PETSC_SUCCESS); 3782 } 3783 3784 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3785 { 3786 PetscFunctionBegin; 3787 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3788 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3789 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3790 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3791 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3792 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3793 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3794 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3795 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3796 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3797 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3798 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3799 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3800 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3801 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3802 PetscFunctionReturn(PETSC_SUCCESS); 3803 } 3804