1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #if defined(PETSC_USE_REAL_SINGLE) 15 #include <cmumps_c.h> 16 #else 17 #include <zmumps_c.h> 18 #endif 19 #else 20 #if defined(PETSC_USE_REAL_SINGLE) 21 #include <smumps_c.h> 22 #else 23 #include <dmumps_c.h> 24 #endif 25 #endif 26 EXTERN_C_END 27 #define JOB_INIT -1 28 #define JOB_NULL 0 29 #define JOB_FACTSYMBOLIC 1 30 #define JOB_FACTNUMERIC 2 31 #define JOB_SOLVE 3 32 #define JOB_END -2 33 34 /* calls to MUMPS */ 35 #if defined(PETSC_USE_COMPLEX) 36 #if defined(PETSC_USE_REAL_SINGLE) 37 #define MUMPS_c cmumps_c 38 #else 39 #define MUMPS_c zmumps_c 40 #endif 41 #else 42 #if defined(PETSC_USE_REAL_SINGLE) 43 #define MUMPS_c smumps_c 44 #else 45 #define MUMPS_c dmumps_c 46 #endif 47 #endif 48 49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 50 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 51 naming convention in PetscMPIInt, PetscBLASInt etc. 52 */ 53 typedef MUMPS_INT PetscMUMPSInt; 54 55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 56 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 57 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 58 #endif 59 #else 60 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 61 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 62 #endif 63 #endif 64 65 #define MPIU_MUMPSINT MPI_INT 66 #define PETSC_MUMPS_INT_MAX 2147483647 67 #define PETSC_MUMPS_INT_MIN -2147483648 68 69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 70 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 71 { 72 PetscFunctionBegin; 73 #if PetscDefined(USE_64BIT_INDICES) 74 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 75 #endif 76 *b = (PetscMUMPSInt)(a); 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 /* Put these utility routines here since they are only used in this file */ 81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 82 { 83 PetscInt myval; 84 PetscBool myset; 85 PetscFunctionBegin; 86 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 87 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 88 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 89 if (set) *set = myset; 90 PetscFunctionReturn(PETSC_SUCCESS); 91 } 92 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 93 94 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 95 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 96 #define PetscMUMPS_c(mumps) \ 97 do { \ 98 if (mumps->use_petsc_omp_support) { \ 99 if (mumps->is_omp_master) { \ 100 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 101 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 102 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 103 PetscCall(PetscFPTrapPop()); \ 104 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 105 } \ 106 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 107 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 108 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 109 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 110 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 111 */ \ 112 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 113 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 115 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 116 } else { \ 117 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 118 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 119 PetscCall(PetscFPTrapPop()); \ 120 } \ 121 } while (0) 122 #else 123 #define PetscMUMPS_c(mumps) \ 124 do { \ 125 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 126 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 127 PetscCall(PetscFPTrapPop()); \ 128 } while (0) 129 #endif 130 131 /* declare MumpsScalar */ 132 #if defined(PETSC_USE_COMPLEX) 133 #if defined(PETSC_USE_REAL_SINGLE) 134 #define MumpsScalar mumps_complex 135 #else 136 #define MumpsScalar mumps_double_complex 137 #endif 138 #else 139 #define MumpsScalar PetscScalar 140 #endif 141 142 /* macros s.t. indices match MUMPS documentation */ 143 #define ICNTL(I) icntl[(I)-1] 144 #define CNTL(I) cntl[(I)-1] 145 #define INFOG(I) infog[(I)-1] 146 #define INFO(I) info[(I)-1] 147 #define RINFOG(I) rinfog[(I)-1] 148 #define RINFO(I) rinfo[(I)-1] 149 150 typedef struct Mat_MUMPS Mat_MUMPS; 151 struct Mat_MUMPS { 152 #if defined(PETSC_USE_COMPLEX) 153 #if defined(PETSC_USE_REAL_SINGLE) 154 CMUMPS_STRUC_C id; 155 #else 156 ZMUMPS_STRUC_C id; 157 #endif 158 #else 159 #if defined(PETSC_USE_REAL_SINGLE) 160 SMUMPS_STRUC_C id; 161 #else 162 DMUMPS_STRUC_C id; 163 #endif 164 #endif 165 166 MatStructure matstruc; 167 PetscMPIInt myid, petsc_size; 168 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 169 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 170 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 171 PetscMUMPSInt sym; 172 MPI_Comm mumps_comm; 173 PetscMUMPSInt *ICNTL_pre; 174 PetscReal *CNTL_pre; 175 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 176 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 177 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 178 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 179 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 180 PetscInt *rhs_nrow, max_nrhs; 181 PetscMPIInt *rhs_recvcounts, *rhs_disps; 182 PetscScalar *rhs_loc, *rhs_recvbuf; 183 #endif 184 Vec b_seq, x_seq; 185 PetscInt ninfo, *info; /* which INFO to display */ 186 PetscInt sizeredrhs; 187 PetscScalar *schur_sol; 188 PetscInt schur_sizesol; 189 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 190 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 191 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 192 193 /* Support for MATNEST */ 194 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 195 PetscInt64 *nest_vals_start; 196 PetscScalar *nest_vals; 197 198 /* stuff used by petsc/mumps OpenMP support*/ 199 PetscBool use_petsc_omp_support; 200 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 201 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 202 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 203 PetscMPIInt tag, omp_comm_size; 204 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 205 MPI_Request *reqs; 206 }; 207 208 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 209 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 210 */ 211 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 212 { 213 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 214 215 PetscFunctionBegin; 216 #if defined(PETSC_USE_64BIT_INDICES) 217 { 218 PetscInt i; 219 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 220 PetscCall(PetscFree(mumps->ia_alloc)); 221 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 222 mumps->cur_ilen = nrow + 1; 223 } 224 if (nnz > mumps->cur_jlen) { 225 PetscCall(PetscFree(mumps->ja_alloc)); 226 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 227 mumps->cur_jlen = nnz; 228 } 229 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 230 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 231 *ia_mumps = mumps->ia_alloc; 232 *ja_mumps = mumps->ja_alloc; 233 } 234 #else 235 *ia_mumps = ia; 236 *ja_mumps = ja; 237 #endif 238 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 243 { 244 PetscFunctionBegin; 245 PetscCall(PetscFree(mumps->id.listvar_schur)); 246 PetscCall(PetscFree(mumps->id.redrhs)); 247 PetscCall(PetscFree(mumps->schur_sol)); 248 mumps->id.size_schur = 0; 249 mumps->id.schur_lld = 0; 250 mumps->id.ICNTL(19) = 0; 251 PetscFunctionReturn(PETSC_SUCCESS); 252 } 253 254 /* solve with rhs in mumps->id.redrhs and return in the same location */ 255 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 256 { 257 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 258 Mat S, B, X; 259 MatFactorSchurStatus schurstatus; 260 PetscInt sizesol; 261 262 PetscFunctionBegin; 263 PetscCall(MatFactorFactorizeSchurComplement(F)); 264 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 265 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 266 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 267 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 268 PetscCall(MatBindToCPU(B, S->boundtocpu)); 269 #endif 270 switch (schurstatus) { 271 case MAT_FACTOR_SCHUR_FACTORED: 272 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 273 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 274 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 275 PetscCall(MatBindToCPU(X, S->boundtocpu)); 276 #endif 277 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 278 PetscCall(MatMatSolveTranspose(S, B, X)); 279 } else { 280 PetscCall(MatMatSolve(S, B, X)); 281 } 282 break; 283 case MAT_FACTOR_SCHUR_INVERTED: 284 sizesol = mumps->id.nrhs * mumps->id.size_schur; 285 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 286 PetscCall(PetscFree(mumps->schur_sol)); 287 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 288 mumps->schur_sizesol = sizesol; 289 } 290 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 291 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 292 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 293 PetscCall(MatBindToCPU(X, S->boundtocpu)); 294 #endif 295 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 296 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 297 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 298 } else { 299 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 300 } 301 PetscCall(MatProductSetFromOptions(X)); 302 PetscCall(MatProductSymbolic(X)); 303 PetscCall(MatProductNumeric(X)); 304 305 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 306 break; 307 default: 308 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 309 } 310 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 311 PetscCall(MatDestroy(&B)); 312 PetscCall(MatDestroy(&X)); 313 PetscFunctionReturn(PETSC_SUCCESS); 314 } 315 316 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 317 { 318 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 319 320 PetscFunctionBegin; 321 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 if (!expansion) { /* prepare for the condensation step */ 325 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 326 /* allocate MUMPS internal array to store reduced right-hand sides */ 327 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 328 PetscCall(PetscFree(mumps->id.redrhs)); 329 mumps->id.lredrhs = mumps->id.size_schur; 330 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 331 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 332 } 333 } else { /* prepare for the expansion step */ 334 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 335 PetscCall(MatMumpsSolveSchur_Private(F)); 336 mumps->id.ICNTL(26) = 2; /* expansion phase */ 337 PetscMUMPS_c(mumps); 338 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 339 /* restore defaults */ 340 mumps->id.ICNTL(26) = -1; 341 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 342 if (mumps->id.nrhs > 1) { 343 PetscCall(PetscFree(mumps->id.redrhs)); 344 mumps->id.lredrhs = 0; 345 mumps->sizeredrhs = 0; 346 } 347 } 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 /* 352 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 353 354 input: 355 A - matrix in aij,baij or sbaij format 356 shift - 0: C style output triple; 1: Fortran style output triple. 357 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 358 MAT_REUSE_MATRIX: only the values in v array are updated 359 output: 360 nnz - dim of r, c, and v (number of local nonzero entries of A) 361 r, c, v - row and col index, matrix values (matrix triples) 362 363 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 364 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 365 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 366 367 */ 368 369 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 370 { 371 const PetscScalar *av; 372 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 373 PetscInt64 nz, rnz, i, j, k; 374 PetscMUMPSInt *row, *col; 375 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 376 377 PetscFunctionBegin; 378 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 379 if (reuse == MAT_INITIAL_MATRIX) { 380 nz = aa->nz; 381 ai = aa->i; 382 aj = aa->j; 383 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 384 for (i = k = 0; i < M; i++) { 385 rnz = ai[i + 1] - ai[i]; 386 ajj = aj + ai[i]; 387 for (j = 0; j < rnz; j++) { 388 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 389 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 390 k++; 391 } 392 } 393 mumps->val = (PetscScalar *)av; 394 mumps->irn = row; 395 mumps->jcn = col; 396 mumps->nnz = nz; 397 } else PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); 398 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 403 { 404 PetscInt64 nz, i, j, k, r; 405 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 406 PetscMUMPSInt *row, *col; 407 408 PetscFunctionBegin; 409 nz = a->sliidx[a->totalslices]; 410 if (reuse == MAT_INITIAL_MATRIX) { 411 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 412 for (i = k = 0; i < a->totalslices; i++) { 413 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 414 } 415 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 416 mumps->irn = row; 417 mumps->jcn = col; 418 mumps->nnz = nz; 419 mumps->val = a->val; 420 } else PetscCall(PetscArraycpy(mumps->val, a->val, nz)); 421 PetscFunctionReturn(PETSC_SUCCESS); 422 } 423 424 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 425 { 426 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 427 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 428 PetscInt64 M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 429 PetscInt bs; 430 PetscMUMPSInt *row, *col; 431 432 PetscFunctionBegin; 433 if (reuse == MAT_INITIAL_MATRIX) { 434 PetscCall(MatGetBlockSize(A, &bs)); 435 M = A->rmap->N / bs; 436 ai = aa->i; 437 aj = aa->j; 438 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 439 for (i = 0; i < M; i++) { 440 ajj = aj + ai[i]; 441 rnz = ai[i + 1] - ai[i]; 442 for (k = 0; k < rnz; k++) { 443 for (j = 0; j < bs; j++) { 444 for (m = 0; m < bs; m++) { 445 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 446 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 447 idx++; 448 } 449 } 450 } 451 } 452 mumps->irn = row; 453 mumps->jcn = col; 454 mumps->nnz = nz; 455 mumps->val = aa->a; 456 } else PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); 457 PetscFunctionReturn(PETSC_SUCCESS); 458 } 459 460 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 461 { 462 const PetscInt *ai, *aj, *ajj; 463 PetscInt bs; 464 PetscInt64 nz, rnz, i, j, k, m; 465 PetscMUMPSInt *row, *col; 466 PetscScalar *val; 467 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 468 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 469 #if defined(PETSC_USE_COMPLEX) 470 PetscBool isset, hermitian; 471 #endif 472 473 PetscFunctionBegin; 474 #if defined(PETSC_USE_COMPLEX) 475 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 476 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 477 #endif 478 ai = aa->i; 479 aj = aa->j; 480 PetscCall(MatGetBlockSize(A, &bs)); 481 if (reuse == MAT_INITIAL_MATRIX) { 482 const PetscInt64 alloc_size = aa->nz * bs2; 483 484 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 485 if (bs > 1) { 486 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 487 mumps->val = mumps->val_alloc; 488 } else { 489 mumps->val = aa->a; 490 } 491 mumps->irn = row; 492 mumps->jcn = col; 493 } else { 494 row = mumps->irn; 495 col = mumps->jcn; 496 } 497 val = mumps->val; 498 499 nz = 0; 500 if (bs > 1) { 501 for (i = 0; i < mbs; i++) { 502 rnz = ai[i + 1] - ai[i]; 503 ajj = aj + ai[i]; 504 for (j = 0; j < rnz; j++) { 505 for (k = 0; k < bs; k++) { 506 for (m = 0; m < bs; m++) { 507 if (ajj[j] > i || k >= m) { 508 if (reuse == MAT_INITIAL_MATRIX) { 509 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 510 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 511 } 512 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 513 } 514 } 515 } 516 } 517 } 518 } else if (reuse == MAT_INITIAL_MATRIX) { 519 for (i = 0; i < mbs; i++) { 520 rnz = ai[i + 1] - ai[i]; 521 ajj = aj + ai[i]; 522 for (j = 0; j < rnz; j++) { 523 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 524 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 525 nz++; 526 } 527 } 528 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 529 } else PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX */ 530 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 531 PetscFunctionReturn(PETSC_SUCCESS); 532 } 533 534 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 535 { 536 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 537 PetscInt64 nz, rnz, i, j; 538 const PetscScalar *av, *v1; 539 PetscScalar *val; 540 PetscMUMPSInt *row, *col; 541 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 542 PetscBool missing; 543 #if defined(PETSC_USE_COMPLEX) 544 PetscBool hermitian, isset; 545 #endif 546 547 PetscFunctionBegin; 548 #if defined(PETSC_USE_COMPLEX) 549 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 550 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 551 #endif 552 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 553 ai = aa->i; 554 aj = aa->j; 555 adiag = aa->diag; 556 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 557 if (reuse == MAT_INITIAL_MATRIX) { 558 /* count nz in the upper triangular part of A */ 559 nz = 0; 560 if (missing) { 561 for (i = 0; i < M; i++) { 562 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 563 for (j = ai[i]; j < ai[i + 1]; j++) { 564 if (aj[j] < i) continue; 565 nz++; 566 } 567 } else { 568 nz += ai[i + 1] - adiag[i]; 569 } 570 } 571 } else { 572 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 573 } 574 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 575 PetscCall(PetscMalloc1(nz, &val)); 576 mumps->nnz = nz; 577 mumps->irn = row; 578 mumps->jcn = col; 579 mumps->val = mumps->val_alloc = val; 580 581 nz = 0; 582 if (missing) { 583 for (i = 0; i < M; i++) { 584 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 585 for (j = ai[i]; j < ai[i + 1]; j++) { 586 if (aj[j] < i) continue; 587 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 588 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 589 val[nz] = av[j]; 590 nz++; 591 } 592 } else { 593 rnz = ai[i + 1] - adiag[i]; 594 ajj = aj + adiag[i]; 595 v1 = av + adiag[i]; 596 for (j = 0; j < rnz; j++) { 597 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 598 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 599 val[nz++] = v1[j]; 600 } 601 } 602 } 603 } else { 604 for (i = 0; i < M; i++) { 605 rnz = ai[i + 1] - adiag[i]; 606 ajj = aj + adiag[i]; 607 v1 = av + adiag[i]; 608 for (j = 0; j < rnz; j++) { 609 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 610 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 611 val[nz++] = v1[j]; 612 } 613 } 614 } 615 } else { 616 nz = 0; 617 val = mumps->val; 618 if (missing) { 619 for (i = 0; i < M; i++) { 620 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 621 for (j = ai[i]; j < ai[i + 1]; j++) { 622 if (aj[j] < i) continue; 623 val[nz++] = av[j]; 624 } 625 } else { 626 rnz = ai[i + 1] - adiag[i]; 627 v1 = av + adiag[i]; 628 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 629 } 630 } 631 } else { 632 for (i = 0; i < M; i++) { 633 rnz = ai[i + 1] - adiag[i]; 634 v1 = av + adiag[i]; 635 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 636 } 637 } 638 } 639 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 640 PetscFunctionReturn(PETSC_SUCCESS); 641 } 642 643 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 644 { 645 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 646 PetscInt bs; 647 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 648 PetscMUMPSInt *row, *col; 649 const PetscScalar *av, *bv, *v1, *v2; 650 PetscScalar *val; 651 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 652 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 653 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 654 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 655 #if defined(PETSC_USE_COMPLEX) 656 PetscBool hermitian, isset; 657 #endif 658 659 PetscFunctionBegin; 660 #if defined(PETSC_USE_COMPLEX) 661 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 662 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 663 #endif 664 PetscCall(MatGetBlockSize(A, &bs)); 665 rstart = A->rmap->rstart; 666 ai = aa->i; 667 aj = aa->j; 668 bi = bb->i; 669 bj = bb->j; 670 av = aa->a; 671 bv = bb->a; 672 673 garray = mat->garray; 674 675 if (reuse == MAT_INITIAL_MATRIX) { 676 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 677 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 678 PetscCall(PetscMalloc1(nz, &val)); 679 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 680 mumps->irn = row; 681 mumps->jcn = col; 682 mumps->val = mumps->val_alloc = val; 683 } else { 684 val = mumps->val; 685 } 686 687 jj = 0; 688 irow = rstart; 689 for (i = 0; i < mbs; i++) { 690 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 691 countA = ai[i + 1] - ai[i]; 692 countB = bi[i + 1] - bi[i]; 693 bjj = bj + bi[i]; 694 v1 = av + ai[i] * bs2; 695 v2 = bv + bi[i] * bs2; 696 697 if (bs > 1) { 698 /* A-part */ 699 for (j = 0; j < countA; j++) { 700 for (k = 0; k < bs; k++) { 701 for (m = 0; m < bs; m++) { 702 if (rstart + ajj[j] * bs > irow || k >= m) { 703 if (reuse == MAT_INITIAL_MATRIX) { 704 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 705 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 706 } 707 val[jj++] = v1[j * bs2 + m + k * bs]; 708 } 709 } 710 } 711 } 712 713 /* B-part */ 714 for (j = 0; j < countB; j++) { 715 for (k = 0; k < bs; k++) { 716 for (m = 0; m < bs; m++) { 717 if (reuse == MAT_INITIAL_MATRIX) { 718 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 719 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 720 } 721 val[jj++] = v2[j * bs2 + m + k * bs]; 722 } 723 } 724 } 725 } else { 726 /* A-part */ 727 for (j = 0; j < countA; j++) { 728 if (reuse == MAT_INITIAL_MATRIX) { 729 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 730 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 731 } 732 val[jj++] = v1[j]; 733 } 734 735 /* B-part */ 736 for (j = 0; j < countB; j++) { 737 if (reuse == MAT_INITIAL_MATRIX) { 738 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 739 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 740 } 741 val[jj++] = v2[j]; 742 } 743 } 744 irow += bs; 745 } 746 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 747 PetscFunctionReturn(PETSC_SUCCESS); 748 } 749 750 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 751 { 752 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 753 PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 754 PetscMUMPSInt *row, *col; 755 const PetscScalar *av, *bv, *v1, *v2; 756 PetscScalar *val; 757 Mat Ad, Ao; 758 Mat_SeqAIJ *aa; 759 Mat_SeqAIJ *bb; 760 761 PetscFunctionBegin; 762 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 763 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 764 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 765 766 aa = (Mat_SeqAIJ *)(Ad)->data; 767 bb = (Mat_SeqAIJ *)(Ao)->data; 768 ai = aa->i; 769 aj = aa->j; 770 bi = bb->i; 771 bj = bb->j; 772 773 rstart = A->rmap->rstart; 774 cstart = A->cmap->rstart; 775 776 if (reuse == MAT_INITIAL_MATRIX) { 777 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 778 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 779 PetscCall(PetscMalloc1(nz, &val)); 780 mumps->nnz = nz; 781 mumps->irn = row; 782 mumps->jcn = col; 783 mumps->val = mumps->val_alloc = val; 784 } else { 785 val = mumps->val; 786 } 787 788 jj = 0; 789 irow = rstart; 790 for (i = 0; i < m; i++) { 791 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 792 countA = ai[i + 1] - ai[i]; 793 countB = bi[i + 1] - bi[i]; 794 bjj = bj + bi[i]; 795 v1 = av + ai[i]; 796 v2 = bv + bi[i]; 797 798 /* A-part */ 799 for (j = 0; j < countA; j++) { 800 if (reuse == MAT_INITIAL_MATRIX) { 801 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 802 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 803 } 804 val[jj++] = v1[j]; 805 } 806 807 /* B-part */ 808 for (j = 0; j < countB; j++) { 809 if (reuse == MAT_INITIAL_MATRIX) { 810 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 811 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 812 } 813 val[jj++] = v2[j]; 814 } 815 irow++; 816 } 817 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 818 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 819 PetscFunctionReturn(PETSC_SUCCESS); 820 } 821 822 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 823 { 824 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 825 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 826 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 827 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 828 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 829 const PetscInt bs2 = mat->bs2; 830 PetscInt bs; 831 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 832 PetscMUMPSInt *row, *col; 833 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 834 PetscScalar *val; 835 836 PetscFunctionBegin; 837 PetscCall(MatGetBlockSize(A, &bs)); 838 if (reuse == MAT_INITIAL_MATRIX) { 839 nz = bs2 * (aa->nz + bb->nz); 840 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 841 PetscCall(PetscMalloc1(nz, &val)); 842 mumps->nnz = nz; 843 mumps->irn = row; 844 mumps->jcn = col; 845 mumps->val = mumps->val_alloc = val; 846 } else { 847 val = mumps->val; 848 } 849 850 jj = 0; 851 irow = rstart; 852 for (i = 0; i < mbs; i++) { 853 countA = ai[i + 1] - ai[i]; 854 countB = bi[i + 1] - bi[i]; 855 ajj = aj + ai[i]; 856 bjj = bj + bi[i]; 857 v1 = av + bs2 * ai[i]; 858 v2 = bv + bs2 * bi[i]; 859 860 idx = 0; 861 /* A-part */ 862 for (k = 0; k < countA; k++) { 863 for (j = 0; j < bs; j++) { 864 for (n = 0; n < bs; n++) { 865 if (reuse == MAT_INITIAL_MATRIX) { 866 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 867 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 868 } 869 val[jj++] = v1[idx++]; 870 } 871 } 872 } 873 874 idx = 0; 875 /* B-part */ 876 for (k = 0; k < countB; k++) { 877 for (j = 0; j < bs; j++) { 878 for (n = 0; n < bs; n++) { 879 if (reuse == MAT_INITIAL_MATRIX) { 880 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 881 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 882 } 883 val[jj++] = v2[idx++]; 884 } 885 } 886 } 887 irow += bs; 888 } 889 PetscFunctionReturn(PETSC_SUCCESS); 890 } 891 892 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 893 { 894 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 895 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 896 PetscMUMPSInt *row, *col; 897 const PetscScalar *av, *bv, *v1, *v2; 898 PetscScalar *val; 899 Mat Ad, Ao; 900 Mat_SeqAIJ *aa; 901 Mat_SeqAIJ *bb; 902 #if defined(PETSC_USE_COMPLEX) 903 PetscBool hermitian, isset; 904 #endif 905 906 PetscFunctionBegin; 907 #if defined(PETSC_USE_COMPLEX) 908 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 909 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 910 #endif 911 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 912 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 913 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 914 915 aa = (Mat_SeqAIJ *)(Ad)->data; 916 bb = (Mat_SeqAIJ *)(Ao)->data; 917 ai = aa->i; 918 aj = aa->j; 919 adiag = aa->diag; 920 bi = bb->i; 921 bj = bb->j; 922 923 rstart = A->rmap->rstart; 924 925 if (reuse == MAT_INITIAL_MATRIX) { 926 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 927 nzb = 0; /* num of upper triangular entries in mat->B */ 928 for (i = 0; i < m; i++) { 929 nza += (ai[i + 1] - adiag[i]); 930 countB = bi[i + 1] - bi[i]; 931 bjj = bj + bi[i]; 932 for (j = 0; j < countB; j++) { 933 if (garray[bjj[j]] > rstart) nzb++; 934 } 935 } 936 937 nz = nza + nzb; /* total nz of upper triangular part of mat */ 938 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 939 PetscCall(PetscMalloc1(nz, &val)); 940 mumps->nnz = nz; 941 mumps->irn = row; 942 mumps->jcn = col; 943 mumps->val = mumps->val_alloc = val; 944 } else { 945 val = mumps->val; 946 } 947 948 jj = 0; 949 irow = rstart; 950 for (i = 0; i < m; i++) { 951 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 952 v1 = av + adiag[i]; 953 countA = ai[i + 1] - adiag[i]; 954 countB = bi[i + 1] - bi[i]; 955 bjj = bj + bi[i]; 956 v2 = bv + bi[i]; 957 958 /* A-part */ 959 for (j = 0; j < countA; j++) { 960 if (reuse == MAT_INITIAL_MATRIX) { 961 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 962 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 963 } 964 val[jj++] = v1[j]; 965 } 966 967 /* B-part */ 968 for (j = 0; j < countB; j++) { 969 if (garray[bjj[j]] > rstart) { 970 if (reuse == MAT_INITIAL_MATRIX) { 971 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 972 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 973 } 974 val[jj++] = v2[j]; 975 } 976 } 977 irow++; 978 } 979 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 980 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 981 PetscFunctionReturn(PETSC_SUCCESS); 982 } 983 984 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 985 { 986 const PetscScalar *av; 987 const PetscInt M = A->rmap->n; 988 PetscInt64 i; 989 PetscMUMPSInt *row, *col; 990 Vec v; 991 992 PetscFunctionBegin; 993 PetscCall(MatDiagonalGetDiagonal(A, &v)); 994 PetscCall(VecGetArrayRead(v, &av)); 995 if (reuse == MAT_INITIAL_MATRIX) { 996 PetscCall(PetscMalloc2(M, &row, M, &col)); 997 for (i = 0; i < M; i++) { 998 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 999 col[i] = row[i]; 1000 } 1001 mumps->val = (PetscScalar *)av; 1002 mumps->irn = row; 1003 mumps->jcn = col; 1004 mumps->nnz = M; 1005 } else PetscCall(PetscArraycpy(mumps->val, av, M)); 1006 PetscCall(VecRestoreArrayRead(v, &av)); 1007 PetscFunctionReturn(PETSC_SUCCESS); 1008 } 1009 1010 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1011 { 1012 Mat **mats; 1013 PetscInt nr, nc; 1014 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1015 1016 PetscFunctionBegin; 1017 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1018 if (reuse == MAT_INITIAL_MATRIX) { 1019 PetscMUMPSInt *irns, *jcns; 1020 PetscScalar *vals; 1021 PetscInt64 totnnz, cumnnz, maxnnz; 1022 PetscInt *pjcns_w; 1023 IS *rows, *cols; 1024 PetscInt **rows_idx, **cols_idx; 1025 1026 cumnnz = 0; 1027 maxnnz = 0; 1028 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1029 for (PetscInt r = 0; r < nr; r++) { 1030 for (PetscInt c = 0; c < nc; c++) { 1031 Mat sub = mats[r][c]; 1032 1033 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1034 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1035 if (sub) { 1036 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1037 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE, isDiag; 1038 MatInfo info; 1039 1040 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1041 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1042 else { 1043 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1044 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1045 } 1046 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1047 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1048 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1049 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1050 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1051 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1052 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1053 1054 if (chol) { 1055 if (r == c) { 1056 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1057 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1058 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1059 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1060 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1061 } else { 1062 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1063 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1064 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1065 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1066 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1067 } 1068 } else { 1069 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1070 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1071 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1072 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1073 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1074 } 1075 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1076 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1077 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1078 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1079 maxnnz = PetscMax(maxnnz, info.nz_used); 1080 } 1081 } 1082 } 1083 1084 /* Allocate total COO */ 1085 totnnz = cumnnz; 1086 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1087 PetscCall(PetscMalloc1(totnnz, &vals)); 1088 1089 /* Handle rows and column maps 1090 We directly map rows and use an SF for the columns */ 1091 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1092 PetscCall(MatNestGetISs(A, rows, cols)); 1093 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1094 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1095 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1096 else (void)maxnnz; 1097 1098 cumnnz = 0; 1099 for (PetscInt r = 0; r < nr; r++) { 1100 for (PetscInt c = 0; c < nc; c++) { 1101 Mat sub = mats[r][c]; 1102 const PetscInt *ridx = rows_idx[r]; 1103 const PetscInt *cidx = cols_idx[c]; 1104 PetscInt rst; 1105 PetscSF csf; 1106 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1107 PetscLayout cmap; 1108 1109 mumps->nest_vals_start[r * nc + c] = cumnnz; 1110 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1111 1112 /* Extract inner blocks if needed */ 1113 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1114 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1115 else { 1116 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1117 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1118 } 1119 swap = (PetscBool)(isTrans || isHTrans); 1120 1121 /* Get column layout to map off-process columns */ 1122 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1123 1124 /* Get row start to map on-process rows */ 1125 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1126 1127 /* Directly use the mumps datastructure and use C ordering for now */ 1128 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1129 1130 /* Swap the role of rows and columns indices for transposed blocks 1131 since we need values with global final ordering */ 1132 if (swap) { 1133 cidx = rows_idx[r]; 1134 ridx = cols_idx[c]; 1135 } 1136 1137 /* Communicate column indices 1138 This could have been done with a single SF but it would have complicated the code a lot. 1139 But since we do it only once, we pay the price of setting up an SF for each block */ 1140 if (PetscDefined(USE_64BIT_INDICES)) { 1141 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1142 } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 1143 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1144 PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1145 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1146 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1147 PetscCall(PetscSFDestroy(&csf)); 1148 1149 /* Import indices: use direct map for rows and mapped indices for columns */ 1150 if (swap) { 1151 for (PetscInt k = 0; k < mumps->nnz; k++) { 1152 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1153 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1154 } 1155 } else { 1156 for (PetscInt k = 0; k < mumps->nnz; k++) { 1157 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1158 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1159 } 1160 } 1161 1162 /* Import values to full COO */ 1163 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1164 if (isHTrans) { /* conjugate the entries */ 1165 PetscScalar *v = vals + cumnnz; 1166 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]); 1167 } 1168 1169 /* Shift new starting point and sanity check */ 1170 cumnnz += mumps->nnz; 1171 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1172 1173 /* Free scratch memory */ 1174 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1175 PetscCall(PetscFree(mumps->val_alloc)); 1176 mumps->val = NULL; 1177 mumps->nnz = 0; 1178 } 1179 } 1180 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1181 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1182 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1183 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1184 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1185 mumps->nest_vals_start[nr * nc] = cumnnz; 1186 1187 /* Set pointers for final MUMPS data structure */ 1188 mumps->nest_vals = vals; 1189 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1190 mumps->val = vals; 1191 mumps->irn = irns; 1192 mumps->jcn = jcns; 1193 mumps->nnz = cumnnz; 1194 } else { 1195 PetscScalar *oval = mumps->nest_vals; 1196 for (PetscInt r = 0; r < nr; r++) { 1197 for (PetscInt c = 0; c < nc; c++) { 1198 PetscBool isTrans, isHTrans = PETSC_FALSE; 1199 Mat sub = mats[r][c]; 1200 PetscInt midx = r * nc + c; 1201 1202 if (!mumps->nest_convert_to_triples[midx]) continue; 1203 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1204 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1205 else { 1206 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1207 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1208 } 1209 mumps->val = oval + mumps->nest_vals_start[midx]; 1210 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1211 if (isHTrans) { 1212 PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1213 for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1214 } 1215 } 1216 } 1217 mumps->val = oval; 1218 } 1219 PetscFunctionReturn(PETSC_SUCCESS); 1220 } 1221 1222 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1223 { 1224 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1225 1226 PetscFunctionBegin; 1227 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1228 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1229 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1230 PetscCall(VecDestroy(&mumps->b_seq)); 1231 PetscCall(VecDestroy(&mumps->x_seq)); 1232 PetscCall(PetscFree(mumps->id.perm_in)); 1233 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1234 PetscCall(PetscFree(mumps->val_alloc)); 1235 PetscCall(PetscFree(mumps->info)); 1236 PetscCall(PetscFree(mumps->ICNTL_pre)); 1237 PetscCall(PetscFree(mumps->CNTL_pre)); 1238 PetscCall(MatMumpsResetSchur_Private(mumps)); 1239 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1240 mumps->id.job = JOB_END; 1241 PetscMUMPS_c(mumps); 1242 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1243 if (mumps->mumps_comm != MPI_COMM_NULL) { 1244 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1245 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1246 } 1247 } 1248 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1249 if (mumps->use_petsc_omp_support) { 1250 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1251 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1252 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1253 } 1254 #endif 1255 PetscCall(PetscFree(mumps->ia_alloc)); 1256 PetscCall(PetscFree(mumps->ja_alloc)); 1257 PetscCall(PetscFree(mumps->recvcount)); 1258 PetscCall(PetscFree(mumps->reqs)); 1259 PetscCall(PetscFree(mumps->irhs_loc)); 1260 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1261 PetscCall(PetscFree(mumps->nest_vals)); 1262 PetscCall(PetscFree(A->data)); 1263 1264 /* clear composed functions */ 1265 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1266 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1267 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1268 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1269 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1270 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1271 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1272 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1273 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1274 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1275 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1276 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1277 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1278 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1279 PetscFunctionReturn(PETSC_SUCCESS); 1280 } 1281 1282 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1283 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1284 { 1285 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1286 const PetscMPIInt ompsize = mumps->omp_comm_size; 1287 PetscInt i, m, M, rstart; 1288 1289 PetscFunctionBegin; 1290 PetscCall(MatGetSize(A, &M, NULL)); 1291 PetscCall(MatGetLocalSize(A, &m, NULL)); 1292 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1293 if (ompsize == 1) { 1294 if (!mumps->irhs_loc) { 1295 mumps->nloc_rhs = m; 1296 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1297 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1298 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1299 } 1300 mumps->id.rhs_loc = (MumpsScalar *)array; 1301 } else { 1302 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1303 const PetscInt *ranges; 1304 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1305 MPI_Group petsc_group, omp_group; 1306 PetscScalar *recvbuf = NULL; 1307 1308 if (mumps->is_omp_master) { 1309 /* Lazily initialize the omp stuff for distributed rhs */ 1310 if (!mumps->irhs_loc) { 1311 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1312 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1313 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1314 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1315 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1316 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1317 1318 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1319 mumps->nloc_rhs = 0; 1320 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1321 for (j = 0; j < ompsize; j++) { 1322 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1323 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1324 } 1325 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1326 for (j = k = 0; j < ompsize; j++) { 1327 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1328 } 1329 1330 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1331 PetscCallMPI(MPI_Group_free(&petsc_group)); 1332 PetscCallMPI(MPI_Group_free(&omp_group)); 1333 } 1334 1335 /* Realloc buffers when current nrhs is bigger than what we have met */ 1336 if (nrhs > mumps->max_nrhs) { 1337 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1338 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1339 mumps->max_nrhs = nrhs; 1340 } 1341 1342 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1343 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1344 mumps->rhs_disps[0] = 0; 1345 for (j = 1; j < ompsize; j++) { 1346 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1347 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1348 } 1349 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1350 } 1351 1352 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1353 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1354 1355 if (mumps->is_omp_master) { 1356 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1357 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1358 for (j = 0; j < ompsize; j++) { 1359 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1360 dst = dstbase; 1361 for (i = 0; i < nrhs; i++) { 1362 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1363 src += mumps->rhs_nrow[j]; 1364 dst += mumps->nloc_rhs; 1365 } 1366 dstbase += mumps->rhs_nrow[j]; 1367 } 1368 } 1369 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1370 } 1371 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1372 } 1373 mumps->id.nrhs = nrhs; 1374 mumps->id.nloc_rhs = mumps->nloc_rhs; 1375 mumps->id.lrhs_loc = mumps->nloc_rhs; 1376 mumps->id.irhs_loc = mumps->irhs_loc; 1377 PetscFunctionReturn(PETSC_SUCCESS); 1378 } 1379 1380 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1381 { 1382 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1383 const PetscScalar *rarray = NULL; 1384 PetscScalar *array; 1385 IS is_iden, is_petsc; 1386 PetscInt i; 1387 PetscBool second_solve = PETSC_FALSE; 1388 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1389 1390 PetscFunctionBegin; 1391 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1392 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1393 &cite1)); 1394 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1395 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1396 &cite2)); 1397 1398 if (A->factorerrortype) { 1399 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1400 PetscCall(VecSetInf(x)); 1401 PetscFunctionReturn(PETSC_SUCCESS); 1402 } 1403 1404 mumps->id.nrhs = 1; 1405 if (mumps->petsc_size > 1) { 1406 if (mumps->ICNTL20 == 10) { 1407 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1408 PetscCall(VecGetArrayRead(b, &rarray)); 1409 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1410 } else { 1411 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1412 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1413 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1414 if (!mumps->myid) { 1415 PetscCall(VecGetArray(mumps->b_seq, &array)); 1416 mumps->id.rhs = (MumpsScalar *)array; 1417 } 1418 } 1419 } else { /* petsc_size == 1 */ 1420 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1421 PetscCall(VecCopy(b, x)); 1422 PetscCall(VecGetArray(x, &array)); 1423 mumps->id.rhs = (MumpsScalar *)array; 1424 } 1425 1426 /* 1427 handle condensation step of Schur complement (if any) 1428 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1429 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1430 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1431 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1432 */ 1433 if (mumps->id.size_schur > 0) { 1434 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1435 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1436 second_solve = PETSC_TRUE; 1437 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1438 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1439 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1440 } 1441 /* solve phase */ 1442 mumps->id.job = JOB_SOLVE; 1443 PetscMUMPS_c(mumps); 1444 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1445 1446 /* handle expansion step of Schur complement (if any) */ 1447 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1448 else if (mumps->id.ICNTL(26) == 1) { 1449 PetscCall(MatMumpsSolveSchur_Private(A)); 1450 for (i = 0; i < mumps->id.size_schur; ++i) { 1451 #if !defined(PETSC_USE_COMPLEX) 1452 PetscScalar val = mumps->id.redrhs[i]; 1453 #else 1454 PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i; 1455 #endif 1456 array[mumps->id.listvar_schur[i] - 1] = val; 1457 } 1458 } 1459 1460 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1461 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1462 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1463 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1464 } 1465 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1466 PetscInt *isol2_loc = NULL; 1467 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1468 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1469 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1470 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1471 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1472 PetscCall(ISDestroy(&is_iden)); 1473 PetscCall(ISDestroy(&is_petsc)); 1474 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1475 } 1476 1477 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1478 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1479 } 1480 1481 if (mumps->petsc_size > 1) { 1482 if (mumps->ICNTL20 == 10) { 1483 PetscCall(VecRestoreArrayRead(b, &rarray)); 1484 } else if (!mumps->myid) { 1485 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1486 } 1487 } else PetscCall(VecRestoreArray(x, &array)); 1488 1489 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1490 PetscFunctionReturn(PETSC_SUCCESS); 1491 } 1492 1493 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1494 { 1495 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1496 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1497 1498 PetscFunctionBegin; 1499 mumps->id.ICNTL(9) = 0; 1500 PetscCall(MatSolve_MUMPS(A, b, x)); 1501 mumps->id.ICNTL(9) = value; 1502 PetscFunctionReturn(PETSC_SUCCESS); 1503 } 1504 1505 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1506 { 1507 Mat Bt = NULL; 1508 PetscBool denseX, denseB, flg, flgT; 1509 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1510 PetscInt i, nrhs, M; 1511 PetscScalar *array; 1512 const PetscScalar *rbray; 1513 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1514 PetscMUMPSInt *isol_loc, *isol_loc_save; 1515 PetscScalar *bray, *sol_loc, *sol_loc_save; 1516 IS is_to, is_from; 1517 PetscInt k, proc, j, m, myrstart; 1518 const PetscInt *rstart; 1519 Vec v_mpi, msol_loc; 1520 VecScatter scat_sol; 1521 Vec b_seq; 1522 VecScatter scat_rhs; 1523 PetscScalar *aa; 1524 PetscInt spnr, *ia, *ja; 1525 Mat_MPIAIJ *b = NULL; 1526 1527 PetscFunctionBegin; 1528 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1529 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1530 1531 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1532 if (denseB) { 1533 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1534 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1535 } else { /* sparse B */ 1536 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1537 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1538 if (flgT) { /* input B is transpose of actual RHS matrix, 1539 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1540 PetscCall(MatTransposeGetMat(B, &Bt)); 1541 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1542 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1543 } 1544 1545 PetscCall(MatGetSize(B, &M, &nrhs)); 1546 mumps->id.nrhs = nrhs; 1547 mumps->id.lrhs = M; 1548 mumps->id.rhs = NULL; 1549 1550 if (mumps->petsc_size == 1) { 1551 PetscScalar *aa; 1552 PetscInt spnr, *ia, *ja; 1553 PetscBool second_solve = PETSC_FALSE; 1554 1555 PetscCall(MatDenseGetArray(X, &array)); 1556 mumps->id.rhs = (MumpsScalar *)array; 1557 1558 if (denseB) { 1559 /* copy B to X */ 1560 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1561 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1562 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1563 } else { /* sparse B */ 1564 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1565 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1566 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1567 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1568 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1569 } 1570 /* handle condensation step of Schur complement (if any) */ 1571 if (mumps->id.size_schur > 0) { 1572 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1573 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1574 second_solve = PETSC_TRUE; 1575 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1576 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1577 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1578 } 1579 /* solve phase */ 1580 mumps->id.job = JOB_SOLVE; 1581 PetscMUMPS_c(mumps); 1582 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1583 1584 /* handle expansion step of Schur complement (if any) */ 1585 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1586 else if (mumps->id.ICNTL(26) == 1) { 1587 PetscCall(MatMumpsSolveSchur_Private(A)); 1588 for (j = 0; j < nrhs; ++j) 1589 for (i = 0; i < mumps->id.size_schur; ++i) { 1590 #if !defined(PETSC_USE_COMPLEX) 1591 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs]; 1592 #else 1593 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i; 1594 #endif 1595 array[mumps->id.listvar_schur[i] - 1 + j * M] = val; 1596 } 1597 } 1598 if (!denseB) { /* sparse B */ 1599 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1600 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1601 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1602 } 1603 PetscCall(MatDenseRestoreArray(X, &array)); 1604 PetscFunctionReturn(PETSC_SUCCESS); 1605 } 1606 1607 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1608 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1609 1610 /* create msol_loc to hold mumps local solution */ 1611 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1612 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1613 1614 lsol_loc = mumps->id.lsol_loc; 1615 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1616 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1617 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1618 mumps->id.isol_loc = isol_loc; 1619 1620 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1621 1622 if (denseB) { 1623 if (mumps->ICNTL20 == 10) { 1624 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1625 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1626 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1627 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1628 PetscCall(MatGetLocalSize(B, &m, NULL)); 1629 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1630 } else { 1631 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1632 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1633 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1634 0, re-arrange B into desired order, which is a local operation. 1635 */ 1636 1637 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1638 /* wrap dense rhs matrix B into a vector v_mpi */ 1639 PetscCall(MatGetLocalSize(B, &m, NULL)); 1640 PetscCall(MatDenseGetArray(B, &bray)); 1641 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1642 PetscCall(MatDenseRestoreArray(B, &bray)); 1643 1644 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1645 if (!mumps->myid) { 1646 PetscInt *idx; 1647 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1648 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1649 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1650 k = 0; 1651 for (proc = 0; proc < mumps->petsc_size; proc++) { 1652 for (j = 0; j < nrhs; j++) { 1653 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1654 } 1655 } 1656 1657 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1658 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1659 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1660 } else { 1661 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1662 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1663 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1664 } 1665 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1666 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1667 PetscCall(ISDestroy(&is_to)); 1668 PetscCall(ISDestroy(&is_from)); 1669 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1670 1671 if (!mumps->myid) { /* define rhs on the host */ 1672 PetscCall(VecGetArray(b_seq, &bray)); 1673 mumps->id.rhs = (MumpsScalar *)bray; 1674 PetscCall(VecRestoreArray(b_seq, &bray)); 1675 } 1676 } 1677 } else { /* sparse B */ 1678 b = (Mat_MPIAIJ *)Bt->data; 1679 1680 /* wrap dense X into a vector v_mpi */ 1681 PetscCall(MatGetLocalSize(X, &m, NULL)); 1682 PetscCall(MatDenseGetArray(X, &bray)); 1683 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1684 PetscCall(MatDenseRestoreArray(X, &bray)); 1685 1686 if (!mumps->myid) { 1687 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1688 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1689 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1690 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1691 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1692 } else { 1693 mumps->id.irhs_ptr = NULL; 1694 mumps->id.irhs_sparse = NULL; 1695 mumps->id.nz_rhs = 0; 1696 mumps->id.rhs_sparse = NULL; 1697 } 1698 } 1699 1700 /* solve phase */ 1701 mumps->id.job = JOB_SOLVE; 1702 PetscMUMPS_c(mumps); 1703 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1704 1705 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1706 PetscCall(MatDenseGetArray(X, &array)); 1707 PetscCall(VecPlaceArray(v_mpi, array)); 1708 1709 /* create scatter scat_sol */ 1710 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1711 /* iidx: index for scatter mumps solution to petsc X */ 1712 1713 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1714 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1715 for (i = 0; i < lsol_loc; i++) { 1716 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1717 1718 for (proc = 0; proc < mumps->petsc_size; proc++) { 1719 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1720 myrstart = rstart[proc]; 1721 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1722 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1723 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1724 break; 1725 } 1726 } 1727 1728 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1729 } 1730 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1731 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1732 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1733 PetscCall(ISDestroy(&is_from)); 1734 PetscCall(ISDestroy(&is_to)); 1735 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1736 PetscCall(MatDenseRestoreArray(X, &array)); 1737 1738 /* free spaces */ 1739 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1740 mumps->id.isol_loc = isol_loc_save; 1741 1742 PetscCall(PetscFree2(sol_loc, isol_loc)); 1743 PetscCall(PetscFree(idxx)); 1744 PetscCall(VecDestroy(&msol_loc)); 1745 PetscCall(VecDestroy(&v_mpi)); 1746 if (!denseB) { 1747 if (!mumps->myid) { 1748 b = (Mat_MPIAIJ *)Bt->data; 1749 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1750 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1751 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1752 } 1753 } else { 1754 if (mumps->ICNTL20 == 0) { 1755 PetscCall(VecDestroy(&b_seq)); 1756 PetscCall(VecScatterDestroy(&scat_rhs)); 1757 } 1758 } 1759 PetscCall(VecScatterDestroy(&scat_sol)); 1760 PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)))); 1761 PetscFunctionReturn(PETSC_SUCCESS); 1762 } 1763 1764 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1765 { 1766 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1767 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1768 1769 PetscFunctionBegin; 1770 mumps->id.ICNTL(9) = 0; 1771 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1772 mumps->id.ICNTL(9) = value; 1773 PetscFunctionReturn(PETSC_SUCCESS); 1774 } 1775 1776 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1777 { 1778 PetscBool flg; 1779 Mat B; 1780 1781 PetscFunctionBegin; 1782 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1783 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1784 1785 /* Create B=Bt^T that uses Bt's data structure */ 1786 PetscCall(MatCreateTranspose(Bt, &B)); 1787 1788 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1789 PetscCall(MatDestroy(&B)); 1790 PetscFunctionReturn(PETSC_SUCCESS); 1791 } 1792 1793 #if !defined(PETSC_USE_COMPLEX) 1794 /* 1795 input: 1796 F: numeric factor 1797 output: 1798 nneg: total number of negative pivots 1799 nzero: total number of zero pivots 1800 npos: (global dimension of F) - nneg - nzero 1801 */ 1802 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1803 { 1804 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1805 PetscMPIInt size; 1806 1807 PetscFunctionBegin; 1808 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1809 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1810 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1811 1812 if (nneg) *nneg = mumps->id.INFOG(12); 1813 if (nzero || npos) { 1814 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1815 if (nzero) *nzero = mumps->id.INFOG(28); 1816 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1817 } 1818 PetscFunctionReturn(PETSC_SUCCESS); 1819 } 1820 #endif 1821 1822 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1823 { 1824 PetscInt i, nreqs; 1825 PetscMUMPSInt *irn, *jcn; 1826 PetscMPIInt count; 1827 PetscInt64 totnnz, remain; 1828 const PetscInt osize = mumps->omp_comm_size; 1829 PetscScalar *val; 1830 1831 PetscFunctionBegin; 1832 if (osize > 1) { 1833 if (reuse == MAT_INITIAL_MATRIX) { 1834 /* master first gathers counts of nonzeros to receive */ 1835 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1836 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1837 1838 /* Then each computes number of send/recvs */ 1839 if (mumps->is_omp_master) { 1840 /* Start from 1 since self communication is not done in MPI */ 1841 nreqs = 0; 1842 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1843 } else { 1844 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1845 } 1846 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1847 1848 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1849 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1850 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1851 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1852 */ 1853 nreqs = 0; /* counter for actual send/recvs */ 1854 if (mumps->is_omp_master) { 1855 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1856 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1857 PetscCall(PetscMalloc1(totnnz, &val)); 1858 1859 /* Self communication */ 1860 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1861 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1862 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1863 1864 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1865 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1866 PetscCall(PetscFree(mumps->val_alloc)); 1867 mumps->nnz = totnnz; 1868 mumps->irn = irn; 1869 mumps->jcn = jcn; 1870 mumps->val = mumps->val_alloc = val; 1871 1872 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1873 jcn += mumps->recvcount[0]; 1874 val += mumps->recvcount[0]; 1875 1876 /* Remote communication */ 1877 for (i = 1; i < osize; i++) { 1878 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1879 remain = mumps->recvcount[i] - count; 1880 while (count > 0) { 1881 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1882 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1883 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1884 irn += count; 1885 jcn += count; 1886 val += count; 1887 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1888 remain -= count; 1889 } 1890 } 1891 } else { 1892 irn = mumps->irn; 1893 jcn = mumps->jcn; 1894 val = mumps->val; 1895 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1896 remain = mumps->nnz - count; 1897 while (count > 0) { 1898 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1899 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1900 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1901 irn += count; 1902 jcn += count; 1903 val += count; 1904 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1905 remain -= count; 1906 } 1907 } 1908 } else { 1909 nreqs = 0; 1910 if (mumps->is_omp_master) { 1911 val = mumps->val + mumps->recvcount[0]; 1912 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1913 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1914 remain = mumps->recvcount[i] - count; 1915 while (count > 0) { 1916 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1917 val += count; 1918 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1919 remain -= count; 1920 } 1921 } 1922 } else { 1923 val = mumps->val; 1924 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1925 remain = mumps->nnz - count; 1926 while (count > 0) { 1927 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1928 val += count; 1929 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1930 remain -= count; 1931 } 1932 } 1933 } 1934 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1935 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1936 } 1937 PetscFunctionReturn(PETSC_SUCCESS); 1938 } 1939 1940 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1941 { 1942 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1943 PetscBool isMPIAIJ; 1944 1945 PetscFunctionBegin; 1946 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1947 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1948 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1949 PetscFunctionReturn(PETSC_SUCCESS); 1950 } 1951 1952 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1953 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1954 1955 /* numerical factorization phase */ 1956 mumps->id.job = JOB_FACTNUMERIC; 1957 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1958 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1959 } else { 1960 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1961 } 1962 PetscMUMPS_c(mumps); 1963 if (mumps->id.INFOG(1) < 0) { 1964 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 1965 if (mumps->id.INFOG(1) == -10) { 1966 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1967 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1968 } else if (mumps->id.INFOG(1) == -13) { 1969 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1970 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1971 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1972 PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1973 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1974 } else { 1975 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1976 F->factorerrortype = MAT_FACTOR_OTHER; 1977 } 1978 } 1979 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 1980 1981 F->assembled = PETSC_TRUE; 1982 1983 if (F->schur) { /* reset Schur status to unfactored */ 1984 #if defined(PETSC_HAVE_CUDA) 1985 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1986 #endif 1987 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1988 mumps->id.ICNTL(19) = 2; 1989 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1990 } 1991 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1992 } 1993 1994 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1995 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1996 1997 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1998 if (mumps->petsc_size > 1) { 1999 PetscInt lsol_loc; 2000 PetscScalar *sol_loc; 2001 2002 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 2003 2004 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2005 if (mumps->x_seq) { 2006 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2007 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 2008 PetscCall(VecDestroy(&mumps->x_seq)); 2009 } 2010 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2011 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 2012 mumps->id.lsol_loc = lsol_loc; 2013 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 2014 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 2015 } 2016 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 2017 PetscFunctionReturn(PETSC_SUCCESS); 2018 } 2019 2020 /* Sets MUMPS options from the options database */ 2021 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2022 { 2023 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2024 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2025 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2026 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 2027 MumpsScalar *arr; 2028 2029 PetscFunctionBegin; 2030 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2031 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2032 PetscInt nthreads = 0; 2033 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2034 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2035 2036 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2037 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2038 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2039 2040 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2041 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2042 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2043 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2044 if (mumps->use_petsc_omp_support) { 2045 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2046 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2047 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2048 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2049 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2050 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2051 #endif 2052 } else { 2053 mumps->omp_comm = PETSC_COMM_SELF; 2054 mumps->mumps_comm = mumps->petsc_comm; 2055 mumps->is_omp_master = PETSC_TRUE; 2056 } 2057 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2058 mumps->reqs = NULL; 2059 mumps->tag = 0; 2060 2061 if (mumps->mumps_comm != MPI_COMM_NULL) { 2062 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2063 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2064 MPI_Comm comm; 2065 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2066 mumps->mumps_comm = comm; 2067 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2068 } 2069 2070 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2071 mumps->id.job = JOB_INIT; 2072 mumps->id.par = 1; /* host participates factorizaton and solve */ 2073 mumps->id.sym = mumps->sym; 2074 2075 size = mumps->id.size_schur; 2076 arr = mumps->id.schur; 2077 listvar_schur = mumps->id.listvar_schur; 2078 PetscMUMPS_c(mumps); 2079 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2080 /* restore cached ICNTL and CNTL values */ 2081 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2082 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2083 PetscCall(PetscFree(mumps->ICNTL_pre)); 2084 PetscCall(PetscFree(mumps->CNTL_pre)); 2085 2086 if (schur) { 2087 mumps->id.size_schur = size; 2088 mumps->id.schur_lld = size; 2089 mumps->id.schur = arr; 2090 mumps->id.listvar_schur = listvar_schur; 2091 if (mumps->petsc_size > 1) { 2092 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2093 2094 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2095 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2096 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2097 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2098 } else { 2099 if (F->factortype == MAT_FACTOR_LU) { 2100 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2101 } else { 2102 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2103 } 2104 } 2105 mumps->id.ICNTL(26) = -1; 2106 } 2107 2108 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2109 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2110 */ 2111 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2112 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2113 2114 mumps->scat_rhs = NULL; 2115 mumps->scat_sol = NULL; 2116 2117 /* set PETSc-MUMPS default options - override MUMPS default */ 2118 mumps->id.ICNTL(3) = 0; 2119 mumps->id.ICNTL(4) = 0; 2120 if (mumps->petsc_size == 1) { 2121 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2122 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2123 } else { 2124 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2125 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2126 } 2127 } 2128 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2129 if (flg) mumps->id.ICNTL(1) = icntl; 2130 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2131 if (flg) mumps->id.ICNTL(2) = icntl; 2132 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2133 if (flg) mumps->id.ICNTL(3) = icntl; 2134 2135 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2136 if (flg) mumps->id.ICNTL(4) = icntl; 2137 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2138 2139 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2140 if (flg) mumps->id.ICNTL(6) = icntl; 2141 2142 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2143 if (flg) { 2144 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2145 mumps->id.ICNTL(7) = icntl; 2146 } 2147 2148 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2149 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2150 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2151 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2152 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2153 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2154 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2155 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2156 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2157 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2158 if (flg) { 2159 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2160 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2161 } 2162 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2163 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2164 PetscCall(MatDestroy(&F->schur)); 2165 PetscCall(MatMumpsResetSchur_Private(mumps)); 2166 } 2167 2168 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2169 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2170 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2171 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2172 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2173 In short, we could not use distributed RHS with MPICH until v4.0b1. 2174 */ 2175 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 2176 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2177 #else 2178 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2179 #endif 2180 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2181 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2182 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2183 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2184 #endif 2185 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2186 2187 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2188 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2189 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2190 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2191 2192 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2193 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2194 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2195 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2196 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2197 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2198 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2199 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2200 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2201 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2202 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2203 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2204 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2205 2206 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2207 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2208 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2209 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2210 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2211 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2212 2213 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2214 2215 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2216 if (ninfo) { 2217 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2218 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2219 mumps->ninfo = ninfo; 2220 for (i = 0; i < ninfo; i++) { 2221 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2222 mumps->info[i] = info[i]; 2223 } 2224 } 2225 PetscOptionsEnd(); 2226 PetscFunctionReturn(PETSC_SUCCESS); 2227 } 2228 2229 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2230 { 2231 PetscFunctionBegin; 2232 if (mumps->id.INFOG(1) < 0) { 2233 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2234 if (mumps->id.INFOG(1) == -6) { 2235 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2236 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2237 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2238 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2239 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2240 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2241 PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n")); 2242 } else { 2243 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2244 F->factorerrortype = MAT_FACTOR_OTHER; 2245 } 2246 } 2247 PetscFunctionReturn(PETSC_SUCCESS); 2248 } 2249 2250 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2251 { 2252 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2253 Vec b; 2254 const PetscInt M = A->rmap->N; 2255 2256 PetscFunctionBegin; 2257 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2258 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2259 PetscFunctionReturn(PETSC_SUCCESS); 2260 } 2261 2262 /* Set MUMPS options from the options database */ 2263 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2264 2265 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2266 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2267 2268 /* analysis phase */ 2269 mumps->id.job = JOB_FACTSYMBOLIC; 2270 mumps->id.n = M; 2271 switch (mumps->id.ICNTL(18)) { 2272 case 0: /* centralized assembled matrix input */ 2273 if (!mumps->myid) { 2274 mumps->id.nnz = mumps->nnz; 2275 mumps->id.irn = mumps->irn; 2276 mumps->id.jcn = mumps->jcn; 2277 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2278 if (r) { 2279 mumps->id.ICNTL(7) = 1; 2280 if (!mumps->myid) { 2281 const PetscInt *idx; 2282 PetscInt i; 2283 2284 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2285 PetscCall(ISGetIndices(r, &idx)); 2286 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2287 PetscCall(ISRestoreIndices(r, &idx)); 2288 } 2289 } 2290 } 2291 break; 2292 case 3: /* distributed assembled matrix input (size>1) */ 2293 mumps->id.nnz_loc = mumps->nnz; 2294 mumps->id.irn_loc = mumps->irn; 2295 mumps->id.jcn_loc = mumps->jcn; 2296 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2297 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2298 PetscCall(MatCreateVecs(A, NULL, &b)); 2299 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2300 PetscCall(VecDestroy(&b)); 2301 } 2302 break; 2303 } 2304 PetscMUMPS_c(mumps); 2305 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2306 2307 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2308 F->ops->solve = MatSolve_MUMPS; 2309 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2310 F->ops->matsolve = MatMatSolve_MUMPS; 2311 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2312 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2313 2314 mumps->matstruc = SAME_NONZERO_PATTERN; 2315 PetscFunctionReturn(PETSC_SUCCESS); 2316 } 2317 2318 /* Note the Petsc r and c permutations are ignored */ 2319 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2320 { 2321 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2322 Vec b; 2323 const PetscInt M = A->rmap->N; 2324 2325 PetscFunctionBegin; 2326 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2327 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2328 PetscFunctionReturn(PETSC_SUCCESS); 2329 } 2330 2331 /* Set MUMPS options from the options database */ 2332 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2333 2334 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2335 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2336 2337 /* analysis phase */ 2338 mumps->id.job = JOB_FACTSYMBOLIC; 2339 mumps->id.n = M; 2340 switch (mumps->id.ICNTL(18)) { 2341 case 0: /* centralized assembled matrix input */ 2342 if (!mumps->myid) { 2343 mumps->id.nnz = mumps->nnz; 2344 mumps->id.irn = mumps->irn; 2345 mumps->id.jcn = mumps->jcn; 2346 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2347 } 2348 break; 2349 case 3: /* distributed assembled matrix input (size>1) */ 2350 mumps->id.nnz_loc = mumps->nnz; 2351 mumps->id.irn_loc = mumps->irn; 2352 mumps->id.jcn_loc = mumps->jcn; 2353 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2354 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2355 PetscCall(MatCreateVecs(A, NULL, &b)); 2356 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2357 PetscCall(VecDestroy(&b)); 2358 } 2359 break; 2360 } 2361 PetscMUMPS_c(mumps); 2362 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2363 2364 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2365 F->ops->solve = MatSolve_MUMPS; 2366 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2367 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2368 2369 mumps->matstruc = SAME_NONZERO_PATTERN; 2370 PetscFunctionReturn(PETSC_SUCCESS); 2371 } 2372 2373 /* Note the Petsc r permutation and factor info are ignored */ 2374 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2375 { 2376 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2377 Vec b; 2378 const PetscInt M = A->rmap->N; 2379 2380 PetscFunctionBegin; 2381 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2382 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2383 PetscFunctionReturn(PETSC_SUCCESS); 2384 } 2385 2386 /* Set MUMPS options from the options database */ 2387 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2388 2389 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2390 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2391 2392 /* analysis phase */ 2393 mumps->id.job = JOB_FACTSYMBOLIC; 2394 mumps->id.n = M; 2395 switch (mumps->id.ICNTL(18)) { 2396 case 0: /* centralized assembled matrix input */ 2397 if (!mumps->myid) { 2398 mumps->id.nnz = mumps->nnz; 2399 mumps->id.irn = mumps->irn; 2400 mumps->id.jcn = mumps->jcn; 2401 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2402 } 2403 break; 2404 case 3: /* distributed assembled matrix input (size>1) */ 2405 mumps->id.nnz_loc = mumps->nnz; 2406 mumps->id.irn_loc = mumps->irn; 2407 mumps->id.jcn_loc = mumps->jcn; 2408 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2409 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2410 PetscCall(MatCreateVecs(A, NULL, &b)); 2411 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2412 PetscCall(VecDestroy(&b)); 2413 } 2414 break; 2415 } 2416 PetscMUMPS_c(mumps); 2417 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2418 2419 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2420 F->ops->solve = MatSolve_MUMPS; 2421 F->ops->solvetranspose = MatSolve_MUMPS; 2422 F->ops->matsolve = MatMatSolve_MUMPS; 2423 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2424 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2425 #if defined(PETSC_USE_COMPLEX) 2426 F->ops->getinertia = NULL; 2427 #else 2428 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2429 #endif 2430 2431 mumps->matstruc = SAME_NONZERO_PATTERN; 2432 PetscFunctionReturn(PETSC_SUCCESS); 2433 } 2434 2435 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2436 { 2437 PetscBool iascii; 2438 PetscViewerFormat format; 2439 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2440 2441 PetscFunctionBegin; 2442 /* check if matrix is mumps type */ 2443 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2444 2445 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2446 if (iascii) { 2447 PetscCall(PetscViewerGetFormat(viewer, &format)); 2448 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2449 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2450 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2451 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2452 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2453 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2454 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2455 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2456 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2457 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2458 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2459 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2460 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2461 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2462 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2463 if (mumps->id.ICNTL(11) > 0) { 2464 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2465 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2466 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2467 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2468 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2469 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2470 } 2471 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2472 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2473 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2474 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2475 /* ICNTL(15-17) not used */ 2476 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2477 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2478 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2479 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2480 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2481 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2482 2483 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2484 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2485 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2486 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2487 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2488 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2489 2490 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2491 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2492 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2493 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2494 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2495 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2496 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2497 2498 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2499 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2500 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2501 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2502 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2503 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2504 2505 /* information local to each processor */ 2506 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2507 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2508 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2509 PetscCall(PetscViewerFlush(viewer)); 2510 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2511 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2512 PetscCall(PetscViewerFlush(viewer)); 2513 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2514 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2515 PetscCall(PetscViewerFlush(viewer)); 2516 2517 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2518 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2519 PetscCall(PetscViewerFlush(viewer)); 2520 2521 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2522 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2523 PetscCall(PetscViewerFlush(viewer)); 2524 2525 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2526 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2527 PetscCall(PetscViewerFlush(viewer)); 2528 2529 if (mumps->ninfo && mumps->ninfo <= 80) { 2530 PetscInt i; 2531 for (i = 0; i < mumps->ninfo; i++) { 2532 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2533 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2534 PetscCall(PetscViewerFlush(viewer)); 2535 } 2536 } 2537 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2538 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2539 2540 if (mumps->myid == 0) { /* information from the host */ 2541 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2542 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2543 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2544 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2545 2546 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2547 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2548 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2549 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2550 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2551 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2552 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2553 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2554 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2555 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2556 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2557 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2558 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2559 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2560 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2561 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2562 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2563 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2564 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2565 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2566 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2567 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2568 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2569 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2570 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2571 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2572 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2573 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2574 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2575 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2576 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2577 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2578 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2579 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2580 } 2581 } 2582 } 2583 PetscFunctionReturn(PETSC_SUCCESS); 2584 } 2585 2586 static PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2587 { 2588 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2589 2590 PetscFunctionBegin; 2591 info->block_size = 1.0; 2592 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2593 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2594 info->nz_unneeded = 0.0; 2595 info->assemblies = 0.0; 2596 info->mallocs = 0.0; 2597 info->memory = 0.0; 2598 info->fill_ratio_given = 0; 2599 info->fill_ratio_needed = 0; 2600 info->factor_mallocs = 0; 2601 PetscFunctionReturn(PETSC_SUCCESS); 2602 } 2603 2604 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2605 { 2606 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2607 const PetscScalar *arr; 2608 const PetscInt *idxs; 2609 PetscInt size, i; 2610 2611 PetscFunctionBegin; 2612 PetscCall(ISGetLocalSize(is, &size)); 2613 /* Schur complement matrix */ 2614 PetscCall(MatDestroy(&F->schur)); 2615 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2616 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2617 mumps->id.schur = (MumpsScalar *)arr; 2618 mumps->id.size_schur = size; 2619 mumps->id.schur_lld = size; 2620 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2621 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2622 2623 /* MUMPS expects Fortran style indices */ 2624 PetscCall(PetscFree(mumps->id.listvar_schur)); 2625 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2626 PetscCall(ISGetIndices(is, &idxs)); 2627 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2628 PetscCall(ISRestoreIndices(is, &idxs)); 2629 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2630 mumps->id.ICNTL(26) = -1; 2631 PetscFunctionReturn(PETSC_SUCCESS); 2632 } 2633 2634 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2635 { 2636 Mat St; 2637 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2638 PetscScalar *array; 2639 2640 PetscFunctionBegin; 2641 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 2642 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2643 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2644 PetscCall(MatSetType(St, MATDENSE)); 2645 PetscCall(MatSetUp(St)); 2646 PetscCall(MatDenseGetArray(St, &array)); 2647 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2648 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2649 PetscInt i, j, N = mumps->id.size_schur; 2650 for (i = 0; i < N; i++) { 2651 for (j = 0; j < N; j++) { 2652 #if !defined(PETSC_USE_COMPLEX) 2653 PetscScalar val = mumps->id.schur[i * N + j]; 2654 #else 2655 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2656 #endif 2657 array[j * N + i] = val; 2658 } 2659 } 2660 } else { /* stored by columns */ 2661 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2662 } 2663 } else { /* either full or lower-triangular (not packed) */ 2664 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2665 PetscInt i, j, N = mumps->id.size_schur; 2666 for (i = 0; i < N; i++) { 2667 for (j = i; j < N; j++) { 2668 #if !defined(PETSC_USE_COMPLEX) 2669 PetscScalar val = mumps->id.schur[i * N + j]; 2670 #else 2671 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2672 #endif 2673 array[i * N + j] = array[j * N + i] = val; 2674 } 2675 } 2676 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2677 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2678 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2679 PetscInt i, j, N = mumps->id.size_schur; 2680 for (i = 0; i < N; i++) { 2681 for (j = 0; j < i + 1; j++) { 2682 #if !defined(PETSC_USE_COMPLEX) 2683 PetscScalar val = mumps->id.schur[i * N + j]; 2684 #else 2685 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2686 #endif 2687 array[i * N + j] = array[j * N + i] = val; 2688 } 2689 } 2690 } 2691 } 2692 PetscCall(MatDenseRestoreArray(St, &array)); 2693 *S = St; 2694 PetscFunctionReturn(PETSC_SUCCESS); 2695 } 2696 2697 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2698 { 2699 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2700 2701 PetscFunctionBegin; 2702 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2703 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2704 for (i = 0; i < nICNTL_pre; ++i) 2705 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2706 if (i == nICNTL_pre) { /* not already cached */ 2707 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2708 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2709 mumps->ICNTL_pre[0]++; 2710 } 2711 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2712 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2713 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2714 PetscFunctionReturn(PETSC_SUCCESS); 2715 } 2716 2717 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2718 { 2719 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2720 2721 PetscFunctionBegin; 2722 if (mumps->id.job == JOB_NULL) { 2723 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2724 *ival = 0; 2725 for (i = 0; i < nICNTL_pre; ++i) { 2726 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2727 } 2728 } else *ival = mumps->id.ICNTL(icntl); 2729 PetscFunctionReturn(PETSC_SUCCESS); 2730 } 2731 2732 /*@ 2733 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2734 2735 Logically Collective 2736 2737 Input Parameters: 2738 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2739 . icntl - index of MUMPS parameter array ICNTL() 2740 - ival - value of MUMPS ICNTL(icntl) 2741 2742 Options Database Key: 2743 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2744 2745 Level: beginner 2746 2747 References: 2748 . * - MUMPS Users' Guide 2749 2750 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2751 @*/ 2752 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2753 { 2754 PetscFunctionBegin; 2755 PetscValidType(F, 1); 2756 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2757 PetscValidLogicalCollectiveInt(F, icntl, 2); 2758 PetscValidLogicalCollectiveInt(F, ival, 3); 2759 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2760 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2761 PetscFunctionReturn(PETSC_SUCCESS); 2762 } 2763 2764 /*@ 2765 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2766 2767 Logically Collective 2768 2769 Input Parameters: 2770 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2771 - icntl - index of MUMPS parameter array ICNTL() 2772 2773 Output Parameter: 2774 . ival - value of MUMPS ICNTL(icntl) 2775 2776 Level: beginner 2777 2778 References: 2779 . * - MUMPS Users' Guide 2780 2781 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2782 @*/ 2783 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2784 { 2785 PetscFunctionBegin; 2786 PetscValidType(F, 1); 2787 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2788 PetscValidLogicalCollectiveInt(F, icntl, 2); 2789 PetscAssertPointer(ival, 3); 2790 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2791 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2792 PetscFunctionReturn(PETSC_SUCCESS); 2793 } 2794 2795 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2796 { 2797 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2798 2799 PetscFunctionBegin; 2800 if (mumps->id.job == JOB_NULL) { 2801 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2802 for (i = 0; i < nCNTL_pre; ++i) 2803 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2804 if (i == nCNTL_pre) { 2805 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2806 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2807 mumps->CNTL_pre[0]++; 2808 } 2809 mumps->CNTL_pre[1 + 2 * i] = icntl; 2810 mumps->CNTL_pre[2 + 2 * i] = val; 2811 } else mumps->id.CNTL(icntl) = val; 2812 PetscFunctionReturn(PETSC_SUCCESS); 2813 } 2814 2815 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2816 { 2817 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2818 2819 PetscFunctionBegin; 2820 if (mumps->id.job == JOB_NULL) { 2821 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2822 *val = 0.0; 2823 for (i = 0; i < nCNTL_pre; ++i) { 2824 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2825 } 2826 } else *val = mumps->id.CNTL(icntl); 2827 PetscFunctionReturn(PETSC_SUCCESS); 2828 } 2829 2830 /*@ 2831 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2832 2833 Logically Collective 2834 2835 Input Parameters: 2836 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2837 . icntl - index of MUMPS parameter array CNTL() 2838 - val - value of MUMPS CNTL(icntl) 2839 2840 Options Database Key: 2841 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2842 2843 Level: beginner 2844 2845 References: 2846 . * - MUMPS Users' Guide 2847 2848 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2849 @*/ 2850 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2851 { 2852 PetscFunctionBegin; 2853 PetscValidType(F, 1); 2854 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2855 PetscValidLogicalCollectiveInt(F, icntl, 2); 2856 PetscValidLogicalCollectiveReal(F, val, 3); 2857 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2858 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2859 PetscFunctionReturn(PETSC_SUCCESS); 2860 } 2861 2862 /*@ 2863 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2864 2865 Logically Collective 2866 2867 Input Parameters: 2868 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2869 - icntl - index of MUMPS parameter array CNTL() 2870 2871 Output Parameter: 2872 . val - value of MUMPS CNTL(icntl) 2873 2874 Level: beginner 2875 2876 References: 2877 . * - MUMPS Users' Guide 2878 2879 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2880 @*/ 2881 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2882 { 2883 PetscFunctionBegin; 2884 PetscValidType(F, 1); 2885 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2886 PetscValidLogicalCollectiveInt(F, icntl, 2); 2887 PetscAssertPointer(val, 3); 2888 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2889 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2890 PetscFunctionReturn(PETSC_SUCCESS); 2891 } 2892 2893 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2894 { 2895 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2896 2897 PetscFunctionBegin; 2898 *info = mumps->id.INFO(icntl); 2899 PetscFunctionReturn(PETSC_SUCCESS); 2900 } 2901 2902 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2903 { 2904 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2905 2906 PetscFunctionBegin; 2907 *infog = mumps->id.INFOG(icntl); 2908 PetscFunctionReturn(PETSC_SUCCESS); 2909 } 2910 2911 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2912 { 2913 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2914 2915 PetscFunctionBegin; 2916 *rinfo = mumps->id.RINFO(icntl); 2917 PetscFunctionReturn(PETSC_SUCCESS); 2918 } 2919 2920 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2921 { 2922 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2923 2924 PetscFunctionBegin; 2925 *rinfog = mumps->id.RINFOG(icntl); 2926 PetscFunctionReturn(PETSC_SUCCESS); 2927 } 2928 2929 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2930 { 2931 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2932 2933 PetscFunctionBegin; 2934 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2935 *size = 0; 2936 *array = NULL; 2937 if (!mumps->myid) { 2938 *size = mumps->id.INFOG(28); 2939 PetscCall(PetscMalloc1(*size, array)); 2940 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2941 } 2942 PetscFunctionReturn(PETSC_SUCCESS); 2943 } 2944 2945 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2946 { 2947 Mat Bt = NULL, Btseq = NULL; 2948 PetscBool flg; 2949 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2950 PetscScalar *aa; 2951 PetscInt spnr, *ia, *ja, M, nrhs; 2952 2953 PetscFunctionBegin; 2954 PetscAssertPointer(spRHS, 2); 2955 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2956 if (flg) { 2957 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2958 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2959 2960 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2961 2962 if (mumps->petsc_size > 1) { 2963 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2964 Btseq = b->A; 2965 } else { 2966 Btseq = Bt; 2967 } 2968 2969 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2970 mumps->id.nrhs = nrhs; 2971 mumps->id.lrhs = M; 2972 mumps->id.rhs = NULL; 2973 2974 if (!mumps->myid) { 2975 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2976 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2977 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2978 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2979 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2980 } else { 2981 mumps->id.irhs_ptr = NULL; 2982 mumps->id.irhs_sparse = NULL; 2983 mumps->id.nz_rhs = 0; 2984 mumps->id.rhs_sparse = NULL; 2985 } 2986 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2987 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2988 2989 /* solve phase */ 2990 mumps->id.job = JOB_SOLVE; 2991 PetscMUMPS_c(mumps); 2992 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2993 2994 if (!mumps->myid) { 2995 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 2996 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2997 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2998 } 2999 PetscFunctionReturn(PETSC_SUCCESS); 3000 } 3001 3002 /*@ 3003 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 3004 3005 Logically Collective 3006 3007 Input Parameter: 3008 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3009 3010 Output Parameter: 3011 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3012 3013 Level: beginner 3014 3015 References: 3016 . * - MUMPS Users' Guide 3017 3018 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3019 @*/ 3020 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3021 { 3022 PetscFunctionBegin; 3023 PetscValidType(F, 1); 3024 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3025 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3026 PetscFunctionReturn(PETSC_SUCCESS); 3027 } 3028 3029 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3030 { 3031 Mat spRHS; 3032 3033 PetscFunctionBegin; 3034 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3035 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3036 PetscCall(MatDestroy(&spRHS)); 3037 PetscFunctionReturn(PETSC_SUCCESS); 3038 } 3039 3040 /*@ 3041 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 3042 3043 Logically Collective 3044 3045 Input Parameter: 3046 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3047 3048 Output Parameter: 3049 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3050 3051 Level: beginner 3052 3053 References: 3054 . * - MUMPS Users' Guide 3055 3056 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3057 @*/ 3058 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3059 { 3060 PetscBool flg; 3061 3062 PetscFunctionBegin; 3063 PetscValidType(F, 1); 3064 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3065 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3066 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3067 3068 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3069 PetscFunctionReturn(PETSC_SUCCESS); 3070 } 3071 3072 /*@ 3073 MatMumpsGetInfo - Get MUMPS parameter INFO() 3074 3075 Logically Collective 3076 3077 Input Parameters: 3078 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3079 - icntl - index of MUMPS parameter array INFO() 3080 3081 Output Parameter: 3082 . ival - value of MUMPS INFO(icntl) 3083 3084 Level: beginner 3085 3086 References: 3087 . * - MUMPS Users' Guide 3088 3089 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3090 @*/ 3091 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3092 { 3093 PetscFunctionBegin; 3094 PetscValidType(F, 1); 3095 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3096 PetscAssertPointer(ival, 3); 3097 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3098 PetscFunctionReturn(PETSC_SUCCESS); 3099 } 3100 3101 /*@ 3102 MatMumpsGetInfog - Get MUMPS parameter INFOG() 3103 3104 Logically Collective 3105 3106 Input Parameters: 3107 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3108 - icntl - index of MUMPS parameter array INFOG() 3109 3110 Output Parameter: 3111 . ival - value of MUMPS INFOG(icntl) 3112 3113 Level: beginner 3114 3115 References: 3116 . * - MUMPS Users' Guide 3117 3118 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3119 @*/ 3120 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3121 { 3122 PetscFunctionBegin; 3123 PetscValidType(F, 1); 3124 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3125 PetscAssertPointer(ival, 3); 3126 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3127 PetscFunctionReturn(PETSC_SUCCESS); 3128 } 3129 3130 /*@ 3131 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3132 3133 Logically Collective 3134 3135 Input Parameters: 3136 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3137 - icntl - index of MUMPS parameter array RINFO() 3138 3139 Output Parameter: 3140 . val - value of MUMPS RINFO(icntl) 3141 3142 Level: beginner 3143 3144 References: 3145 . * - MUMPS Users' Guide 3146 3147 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3148 @*/ 3149 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3150 { 3151 PetscFunctionBegin; 3152 PetscValidType(F, 1); 3153 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3154 PetscAssertPointer(val, 3); 3155 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3156 PetscFunctionReturn(PETSC_SUCCESS); 3157 } 3158 3159 /*@ 3160 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3161 3162 Logically Collective 3163 3164 Input Parameters: 3165 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3166 - icntl - index of MUMPS parameter array RINFOG() 3167 3168 Output Parameter: 3169 . val - value of MUMPS RINFOG(icntl) 3170 3171 Level: beginner 3172 3173 References: 3174 . * - MUMPS Users' Guide 3175 3176 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3177 @*/ 3178 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3179 { 3180 PetscFunctionBegin; 3181 PetscValidType(F, 1); 3182 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3183 PetscAssertPointer(val, 3); 3184 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3185 PetscFunctionReturn(PETSC_SUCCESS); 3186 } 3187 3188 /*@ 3189 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 3190 3191 Logically Collective 3192 3193 Input Parameter: 3194 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3195 3196 Output Parameters: 3197 + size - local size of the array. The size of the array is non-zero only on the host. 3198 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3199 for freeing this array. 3200 3201 Level: beginner 3202 3203 References: 3204 . * - MUMPS Users' Guide 3205 3206 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3207 @*/ 3208 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3209 { 3210 PetscFunctionBegin; 3211 PetscValidType(F, 1); 3212 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3213 PetscAssertPointer(size, 2); 3214 PetscAssertPointer(array, 3); 3215 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3216 PetscFunctionReturn(PETSC_SUCCESS); 3217 } 3218 3219 /*MC 3220 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3221 distributed and sequential matrices via the external package MUMPS. 3222 3223 Works with `MATAIJ` and `MATSBAIJ` matrices 3224 3225 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3226 3227 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3228 See details below. 3229 3230 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3231 3232 Options Database Keys: 3233 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3234 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3235 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3236 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3237 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3238 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3239 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3240 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3241 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3242 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3243 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3244 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3245 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3246 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3247 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3248 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3249 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3250 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3251 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3252 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3253 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3254 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 3255 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3256 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3257 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3258 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3259 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3260 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3261 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3262 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3263 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3264 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3265 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3266 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3267 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3268 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3269 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3270 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3271 3272 Level: beginner 3273 3274 Notes: 3275 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 3276 error if the matrix is Hermitian. 3277 3278 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3279 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3280 3281 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3282 the failure with 3283 .vb 3284 KSPGetPC(ksp,&pc); 3285 PCFactorGetMatrix(pc,&mat); 3286 MatMumpsGetInfo(mat,....); 3287 MatMumpsGetInfog(mat,....); etc. 3288 .ve 3289 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3290 3291 MUMPS provides 64-bit integer support in two build modes: 3292 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3293 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3294 3295 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3296 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3297 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3298 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3299 3300 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3301 3302 Two modes to run MUMPS/PETSc with OpenMP 3303 .vb 3304 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3305 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3306 .ve 3307 3308 .vb 3309 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3310 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3311 .ve 3312 3313 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3314 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3315 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3316 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3317 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3318 3319 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3320 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3321 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3322 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3323 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3324 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3325 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3326 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3327 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3328 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3329 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3330 examine the mapping result. 3331 3332 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3333 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3334 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3335 3336 References: 3337 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3338 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3339 3340 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3341 M*/ 3342 3343 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3344 { 3345 PetscFunctionBegin; 3346 *type = MATSOLVERMUMPS; 3347 PetscFunctionReturn(PETSC_SUCCESS); 3348 } 3349 3350 /* MatGetFactor for Seq and MPI AIJ matrices */ 3351 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3352 { 3353 Mat B; 3354 Mat_MUMPS *mumps; 3355 PetscBool isSeqAIJ, isDiag; 3356 PetscMPIInt size; 3357 3358 PetscFunctionBegin; 3359 #if defined(PETSC_USE_COMPLEX) 3360 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3361 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3362 *F = NULL; 3363 PetscFunctionReturn(PETSC_SUCCESS); 3364 } 3365 #endif 3366 /* Create the factorization matrix */ 3367 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3368 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 3369 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3370 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3371 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3372 PetscCall(MatSetUp(B)); 3373 3374 PetscCall(PetscNew(&mumps)); 3375 3376 B->ops->view = MatView_MUMPS; 3377 B->ops->getinfo = MatGetInfo_MUMPS; 3378 3379 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3380 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3381 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3382 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3383 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3384 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3385 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3386 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3387 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3388 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3389 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3390 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3391 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3392 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3393 3394 if (ftype == MAT_FACTOR_LU) { 3395 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3396 B->factortype = MAT_FACTOR_LU; 3397 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3398 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3399 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3400 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3401 mumps->sym = 0; 3402 } else { 3403 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3404 B->factortype = MAT_FACTOR_CHOLESKY; 3405 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3406 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3407 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3408 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3409 #if defined(PETSC_USE_COMPLEX) 3410 mumps->sym = 2; 3411 #else 3412 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3413 else mumps->sym = 2; 3414 #endif 3415 } 3416 3417 /* set solvertype */ 3418 PetscCall(PetscFree(B->solvertype)); 3419 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3420 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3421 if (size == 1) { 3422 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3423 B->canuseordering = PETSC_TRUE; 3424 } 3425 B->ops->destroy = MatDestroy_MUMPS; 3426 B->data = (void *)mumps; 3427 3428 *F = B; 3429 mumps->id.job = JOB_NULL; 3430 mumps->ICNTL_pre = NULL; 3431 mumps->CNTL_pre = NULL; 3432 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3433 PetscFunctionReturn(PETSC_SUCCESS); 3434 } 3435 3436 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3437 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3438 { 3439 Mat B; 3440 Mat_MUMPS *mumps; 3441 PetscBool isSeqSBAIJ; 3442 PetscMPIInt size; 3443 3444 PetscFunctionBegin; 3445 #if defined(PETSC_USE_COMPLEX) 3446 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3447 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3448 *F = NULL; 3449 PetscFunctionReturn(PETSC_SUCCESS); 3450 } 3451 #endif 3452 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3453 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3454 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3455 PetscCall(MatSetUp(B)); 3456 3457 PetscCall(PetscNew(&mumps)); 3458 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3459 if (isSeqSBAIJ) { 3460 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3461 } else { 3462 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3463 } 3464 3465 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3466 B->ops->view = MatView_MUMPS; 3467 B->ops->getinfo = MatGetInfo_MUMPS; 3468 3469 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3470 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3471 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3472 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3473 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3474 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3475 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3476 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3477 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3478 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3479 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3480 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3481 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3482 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3483 3484 B->factortype = MAT_FACTOR_CHOLESKY; 3485 #if defined(PETSC_USE_COMPLEX) 3486 mumps->sym = 2; 3487 #else 3488 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3489 else mumps->sym = 2; 3490 #endif 3491 3492 /* set solvertype */ 3493 PetscCall(PetscFree(B->solvertype)); 3494 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3495 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3496 if (size == 1) { 3497 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3498 B->canuseordering = PETSC_TRUE; 3499 } 3500 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3501 B->ops->destroy = MatDestroy_MUMPS; 3502 B->data = (void *)mumps; 3503 3504 *F = B; 3505 mumps->id.job = JOB_NULL; 3506 mumps->ICNTL_pre = NULL; 3507 mumps->CNTL_pre = NULL; 3508 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3509 PetscFunctionReturn(PETSC_SUCCESS); 3510 } 3511 3512 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3513 { 3514 Mat B; 3515 Mat_MUMPS *mumps; 3516 PetscBool isSeqBAIJ; 3517 PetscMPIInt size; 3518 3519 PetscFunctionBegin; 3520 /* Create the factorization matrix */ 3521 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3522 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3523 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3524 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3525 PetscCall(MatSetUp(B)); 3526 3527 PetscCall(PetscNew(&mumps)); 3528 if (ftype == MAT_FACTOR_LU) { 3529 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3530 B->factortype = MAT_FACTOR_LU; 3531 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3532 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3533 mumps->sym = 0; 3534 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3535 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3536 3537 B->ops->view = MatView_MUMPS; 3538 B->ops->getinfo = MatGetInfo_MUMPS; 3539 3540 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3541 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3542 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3543 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3544 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3545 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3546 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3547 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3548 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3549 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3550 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3551 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3552 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3553 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3554 3555 /* set solvertype */ 3556 PetscCall(PetscFree(B->solvertype)); 3557 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3558 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3559 if (size == 1) { 3560 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3561 B->canuseordering = PETSC_TRUE; 3562 } 3563 B->ops->destroy = MatDestroy_MUMPS; 3564 B->data = (void *)mumps; 3565 3566 *F = B; 3567 mumps->id.job = JOB_NULL; 3568 mumps->ICNTL_pre = NULL; 3569 mumps->CNTL_pre = NULL; 3570 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3571 PetscFunctionReturn(PETSC_SUCCESS); 3572 } 3573 3574 /* MatGetFactor for Seq and MPI SELL matrices */ 3575 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3576 { 3577 Mat B; 3578 Mat_MUMPS *mumps; 3579 PetscBool isSeqSELL; 3580 PetscMPIInt size; 3581 3582 PetscFunctionBegin; 3583 /* Create the factorization matrix */ 3584 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3585 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3586 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3587 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3588 PetscCall(MatSetUp(B)); 3589 3590 PetscCall(PetscNew(&mumps)); 3591 3592 B->ops->view = MatView_MUMPS; 3593 B->ops->getinfo = MatGetInfo_MUMPS; 3594 3595 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3596 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3597 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3598 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3599 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3600 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3601 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3602 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3603 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3604 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3605 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3606 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3607 3608 if (ftype == MAT_FACTOR_LU) { 3609 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3610 B->factortype = MAT_FACTOR_LU; 3611 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3612 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3613 mumps->sym = 0; 3614 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3615 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3616 3617 /* set solvertype */ 3618 PetscCall(PetscFree(B->solvertype)); 3619 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3620 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3621 if (size == 1) { 3622 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3623 B->canuseordering = PETSC_TRUE; 3624 } 3625 B->ops->destroy = MatDestroy_MUMPS; 3626 B->data = (void *)mumps; 3627 3628 *F = B; 3629 mumps->id.job = JOB_NULL; 3630 mumps->ICNTL_pre = NULL; 3631 mumps->CNTL_pre = NULL; 3632 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3633 PetscFunctionReturn(PETSC_SUCCESS); 3634 } 3635 3636 /* MatGetFactor for MATNEST matrices */ 3637 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3638 { 3639 Mat B, **mats; 3640 Mat_MUMPS *mumps; 3641 PetscInt nr, nc; 3642 PetscMPIInt size; 3643 PetscBool flg = PETSC_TRUE; 3644 3645 PetscFunctionBegin; 3646 #if defined(PETSC_USE_COMPLEX) 3647 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3648 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3649 *F = NULL; 3650 PetscFunctionReturn(PETSC_SUCCESS); 3651 } 3652 #endif 3653 3654 /* Return if some condition is not satisfied */ 3655 *F = NULL; 3656 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3657 if (ftype == MAT_FACTOR_CHOLESKY) { 3658 IS *rows, *cols; 3659 PetscInt *m, *M; 3660 3661 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3662 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3663 PetscCall(MatNestGetISs(A, rows, cols)); 3664 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3665 if (!flg) { 3666 PetscCall(PetscFree2(rows, cols)); 3667 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3668 PetscFunctionReturn(PETSC_SUCCESS); 3669 } 3670 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3671 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3672 for (PetscInt r = 0; flg && r < nr; r++) 3673 for (PetscInt k = r + 1; flg && k < nr; k++) 3674 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3675 PetscCall(PetscFree2(m, M)); 3676 PetscCall(PetscFree2(rows, cols)); 3677 if (!flg) { 3678 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3679 PetscFunctionReturn(PETSC_SUCCESS); 3680 } 3681 } 3682 3683 for (PetscInt r = 0; r < nr; r++) { 3684 for (PetscInt c = 0; c < nc; c++) { 3685 Mat sub = mats[r][c]; 3686 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isDiag; 3687 3688 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3689 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3690 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3691 else { 3692 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3693 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3694 } 3695 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3696 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3697 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3698 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3699 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3700 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3701 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 3702 if (ftype == MAT_FACTOR_CHOLESKY) { 3703 if (r == c) { 3704 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag) { 3705 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3706 flg = PETSC_FALSE; 3707 } 3708 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag) { 3709 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3710 flg = PETSC_FALSE; 3711 } 3712 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag) { 3713 PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3714 flg = PETSC_FALSE; 3715 } 3716 } 3717 } 3718 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3719 3720 /* Create the factorization matrix */ 3721 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3722 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3723 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3724 PetscCall(MatSetUp(B)); 3725 3726 PetscCall(PetscNew(&mumps)); 3727 3728 B->ops->view = MatView_MUMPS; 3729 B->ops->getinfo = MatGetInfo_MUMPS; 3730 3731 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3732 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3733 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3734 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3735 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3736 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3737 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3738 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3739 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3740 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3741 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3742 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3743 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3744 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3745 3746 if (ftype == MAT_FACTOR_LU) { 3747 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3748 B->factortype = MAT_FACTOR_LU; 3749 mumps->sym = 0; 3750 } else { 3751 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3752 B->factortype = MAT_FACTOR_CHOLESKY; 3753 #if defined(PETSC_USE_COMPLEX) 3754 mumps->sym = 2; 3755 #else 3756 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3757 else mumps->sym = 2; 3758 #endif 3759 } 3760 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3761 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3762 3763 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3764 if (size == 1) { 3765 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3766 B->canuseordering = PETSC_TRUE; 3767 } 3768 3769 /* set solvertype */ 3770 PetscCall(PetscFree(B->solvertype)); 3771 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3772 B->ops->destroy = MatDestroy_MUMPS; 3773 B->data = (void *)mumps; 3774 3775 *F = B; 3776 mumps->id.job = JOB_NULL; 3777 mumps->ICNTL_pre = NULL; 3778 mumps->CNTL_pre = NULL; 3779 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3780 PetscFunctionReturn(PETSC_SUCCESS); 3781 } 3782 3783 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3784 { 3785 PetscFunctionBegin; 3786 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3787 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3788 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3789 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3790 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3791 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3792 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3793 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3794 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3795 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3796 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3797 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3798 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3799 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3800 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3801 PetscFunctionReturn(PETSC_SUCCESS); 3802 } 3803