1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_NULL 0 27 #define JOB_FACTSYMBOLIC 1 28 #define JOB_FACTNUMERIC 2 29 #define JOB_SOLVE 3 30 #define JOB_END -2 31 32 /* calls to MUMPS */ 33 #if defined(PETSC_USE_COMPLEX) 34 #if defined(PETSC_USE_REAL_SINGLE) 35 #define MUMPS_c cmumps_c 36 #else 37 #define MUMPS_c zmumps_c 38 #endif 39 #else 40 #if defined(PETSC_USE_REAL_SINGLE) 41 #define MUMPS_c smumps_c 42 #else 43 #define MUMPS_c dmumps_c 44 #endif 45 #endif 46 47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 48 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 49 naming convention in PetscMPIInt, PetscBLASInt etc. 50 */ 51 typedef MUMPS_INT PetscMUMPSInt; 52 53 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 54 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 55 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 56 #endif 57 #else 58 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 59 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 60 #endif 61 #endif 62 63 #define MPIU_MUMPSINT MPI_INT 64 #define PETSC_MUMPS_INT_MAX 2147483647 65 #define PETSC_MUMPS_INT_MIN -2147483648 66 67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 69 { 70 PetscFunctionBegin; 71 #if PetscDefined(USE_64BIT_INDICES) 72 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 73 #endif 74 *b = (PetscMUMPSInt)(a); 75 PetscFunctionReturn(0); 76 } 77 78 /* Put these utility routines here since they are only used in this file */ 79 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 80 { 81 PetscInt myval; 82 PetscBool myset; 83 PetscFunctionBegin; 84 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 85 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 86 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 87 if (set) *set = myset; 88 PetscFunctionReturn(0); 89 } 90 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 91 92 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 93 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 94 #define PetscMUMPS_c(mumps) \ 95 do { \ 96 if (mumps->use_petsc_omp_support) { \ 97 if (mumps->is_omp_master) { \ 98 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 99 MUMPS_c(&mumps->id); \ 100 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 101 } \ 102 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 103 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 104 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 105 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 106 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 107 */ \ 108 PetscCallMPI(MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 109 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm)); \ 110 PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 111 } else { \ 112 MUMPS_c(&mumps->id); \ 113 } \ 114 } while (0) 115 #else 116 #define PetscMUMPS_c(mumps) \ 117 do { \ 118 MUMPS_c(&mumps->id); \ 119 } while (0) 120 #endif 121 122 /* declare MumpsScalar */ 123 #if defined(PETSC_USE_COMPLEX) 124 #if defined(PETSC_USE_REAL_SINGLE) 125 #define MumpsScalar mumps_complex 126 #else 127 #define MumpsScalar mumps_double_complex 128 #endif 129 #else 130 #define MumpsScalar PetscScalar 131 #endif 132 133 /* macros s.t. indices match MUMPS documentation */ 134 #define ICNTL(I) icntl[(I)-1] 135 #define CNTL(I) cntl[(I)-1] 136 #define INFOG(I) infog[(I)-1] 137 #define INFO(I) info[(I)-1] 138 #define RINFOG(I) rinfog[(I)-1] 139 #define RINFO(I) rinfo[(I)-1] 140 141 typedef struct Mat_MUMPS Mat_MUMPS; 142 struct Mat_MUMPS { 143 #if defined(PETSC_USE_COMPLEX) 144 #if defined(PETSC_USE_REAL_SINGLE) 145 CMUMPS_STRUC_C id; 146 #else 147 ZMUMPS_STRUC_C id; 148 #endif 149 #else 150 #if defined(PETSC_USE_REAL_SINGLE) 151 SMUMPS_STRUC_C id; 152 #else 153 DMUMPS_STRUC_C id; 154 #endif 155 #endif 156 157 MatStructure matstruc; 158 PetscMPIInt myid, petsc_size; 159 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 160 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 161 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 162 PetscMUMPSInt sym; 163 MPI_Comm mumps_comm; 164 PetscMUMPSInt *ICNTL_pre; 165 PetscReal *CNTL_pre; 166 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 167 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 168 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 169 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 170 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 171 PetscInt *rhs_nrow, max_nrhs; 172 PetscMPIInt *rhs_recvcounts, *rhs_disps; 173 PetscScalar *rhs_loc, *rhs_recvbuf; 174 #endif 175 Vec b_seq, x_seq; 176 PetscInt ninfo, *info; /* which INFO to display */ 177 PetscInt sizeredrhs; 178 PetscScalar *schur_sol; 179 PetscInt schur_sizesol; 180 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 181 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 182 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 183 184 /* stuff used by petsc/mumps OpenMP support*/ 185 PetscBool use_petsc_omp_support; 186 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 187 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 188 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 189 PetscMPIInt tag, omp_comm_size; 190 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 191 MPI_Request *reqs; 192 }; 193 194 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 195 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 196 */ 197 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 198 { 199 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 200 201 PetscFunctionBegin; 202 #if defined(PETSC_USE_64BIT_INDICES) 203 { 204 PetscInt i; 205 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 206 PetscCall(PetscFree(mumps->ia_alloc)); 207 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 208 mumps->cur_ilen = nrow + 1; 209 } 210 if (nnz > mumps->cur_jlen) { 211 PetscCall(PetscFree(mumps->ja_alloc)); 212 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 213 mumps->cur_jlen = nnz; 214 } 215 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 216 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 217 *ia_mumps = mumps->ia_alloc; 218 *ja_mumps = mumps->ja_alloc; 219 } 220 #else 221 *ia_mumps = ia; 222 *ja_mumps = ja; 223 #endif 224 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 225 PetscFunctionReturn(0); 226 } 227 228 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 229 { 230 PetscFunctionBegin; 231 PetscCall(PetscFree(mumps->id.listvar_schur)); 232 PetscCall(PetscFree(mumps->id.redrhs)); 233 PetscCall(PetscFree(mumps->schur_sol)); 234 mumps->id.size_schur = 0; 235 mumps->id.schur_lld = 0; 236 mumps->id.ICNTL(19) = 0; 237 PetscFunctionReturn(0); 238 } 239 240 /* solve with rhs in mumps->id.redrhs and return in the same location */ 241 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 242 { 243 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 244 Mat S, B, X; 245 MatFactorSchurStatus schurstatus; 246 PetscInt sizesol; 247 248 PetscFunctionBegin; 249 PetscCall(MatFactorFactorizeSchurComplement(F)); 250 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 251 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 252 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 253 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 254 PetscCall(MatBindToCPU(B, S->boundtocpu)); 255 #endif 256 switch (schurstatus) { 257 case MAT_FACTOR_SCHUR_FACTORED: 258 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 259 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 260 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 261 PetscCall(MatBindToCPU(X, S->boundtocpu)); 262 #endif 263 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 264 PetscCall(MatMatSolveTranspose(S, B, X)); 265 } else { 266 PetscCall(MatMatSolve(S, B, X)); 267 } 268 break; 269 case MAT_FACTOR_SCHUR_INVERTED: 270 sizesol = mumps->id.nrhs * mumps->id.size_schur; 271 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 272 PetscCall(PetscFree(mumps->schur_sol)); 273 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 274 mumps->schur_sizesol = sizesol; 275 } 276 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 277 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 278 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 279 PetscCall(MatBindToCPU(X, S->boundtocpu)); 280 #endif 281 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 282 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 283 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 284 } else { 285 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 286 } 287 PetscCall(MatProductSetFromOptions(X)); 288 PetscCall(MatProductSymbolic(X)); 289 PetscCall(MatProductNumeric(X)); 290 291 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 292 break; 293 default: 294 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 295 } 296 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 297 PetscCall(MatDestroy(&B)); 298 PetscCall(MatDestroy(&X)); 299 PetscFunctionReturn(0); 300 } 301 302 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 303 { 304 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 305 306 PetscFunctionBegin; 307 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 308 PetscFunctionReturn(0); 309 } 310 if (!expansion) { /* prepare for the condensation step */ 311 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 312 /* allocate MUMPS internal array to store reduced right-hand sides */ 313 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 314 PetscCall(PetscFree(mumps->id.redrhs)); 315 mumps->id.lredrhs = mumps->id.size_schur; 316 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 317 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 318 } 319 mumps->id.ICNTL(26) = 1; /* condensation phase */ 320 } else { /* prepare for the expansion step */ 321 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 322 PetscCall(MatMumpsSolveSchur_Private(F)); 323 mumps->id.ICNTL(26) = 2; /* expansion phase */ 324 PetscMUMPS_c(mumps); 325 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 326 /* restore defaults */ 327 mumps->id.ICNTL(26) = -1; 328 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 329 if (mumps->id.nrhs > 1) { 330 PetscCall(PetscFree(mumps->id.redrhs)); 331 mumps->id.lredrhs = 0; 332 mumps->sizeredrhs = 0; 333 } 334 } 335 PetscFunctionReturn(0); 336 } 337 338 /* 339 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 340 341 input: 342 A - matrix in aij,baij or sbaij format 343 shift - 0: C style output triple; 1: Fortran style output triple. 344 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 345 MAT_REUSE_MATRIX: only the values in v array are updated 346 output: 347 nnz - dim of r, c, and v (number of local nonzero entries of A) 348 r, c, v - row and col index, matrix values (matrix triples) 349 350 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 351 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 352 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 353 354 */ 355 356 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 357 { 358 const PetscScalar *av; 359 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 360 PetscInt64 nz, rnz, i, j, k; 361 PetscMUMPSInt *row, *col; 362 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 363 364 PetscFunctionBegin; 365 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 366 mumps->val = (PetscScalar *)av; 367 if (reuse == MAT_INITIAL_MATRIX) { 368 nz = aa->nz; 369 ai = aa->i; 370 aj = aa->j; 371 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 372 for (i = k = 0; i < M; i++) { 373 rnz = ai[i + 1] - ai[i]; 374 ajj = aj + ai[i]; 375 for (j = 0; j < rnz; j++) { 376 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 377 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 378 k++; 379 } 380 } 381 mumps->irn = row; 382 mumps->jcn = col; 383 mumps->nnz = nz; 384 } 385 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 386 PetscFunctionReturn(0); 387 } 388 389 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 390 { 391 PetscInt64 nz, i, j, k, r; 392 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 393 PetscMUMPSInt *row, *col; 394 395 PetscFunctionBegin; 396 mumps->val = a->val; 397 if (reuse == MAT_INITIAL_MATRIX) { 398 nz = a->sliidx[a->totalslices]; 399 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 400 for (i = k = 0; i < a->totalslices; i++) { 401 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 402 } 403 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 404 mumps->irn = row; 405 mumps->jcn = col; 406 mumps->nnz = nz; 407 } 408 PetscFunctionReturn(0); 409 } 410 411 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 412 { 413 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 414 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 415 PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 416 PetscInt bs; 417 PetscMUMPSInt *row, *col; 418 419 PetscFunctionBegin; 420 PetscCall(MatGetBlockSize(A, &bs)); 421 M = A->rmap->N / bs; 422 mumps->val = aa->a; 423 if (reuse == MAT_INITIAL_MATRIX) { 424 ai = aa->i; 425 aj = aa->j; 426 nz = bs2 * aa->nz; 427 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 428 for (i = 0; i < M; i++) { 429 ajj = aj + ai[i]; 430 rnz = ai[i + 1] - ai[i]; 431 for (k = 0; k < rnz; k++) { 432 for (j = 0; j < bs; j++) { 433 for (m = 0; m < bs; m++) { 434 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 435 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 436 idx++; 437 } 438 } 439 } 440 } 441 mumps->irn = row; 442 mumps->jcn = col; 443 mumps->nnz = nz; 444 } 445 PetscFunctionReturn(0); 446 } 447 448 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 449 { 450 const PetscInt *ai, *aj, *ajj; 451 PetscInt bs; 452 PetscInt64 nz, rnz, i, j, k, m; 453 PetscMUMPSInt *row, *col; 454 PetscScalar *val; 455 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 456 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 457 #if defined(PETSC_USE_COMPLEX) 458 PetscBool isset, hermitian; 459 #endif 460 461 PetscFunctionBegin; 462 #if defined(PETSC_USE_COMPLEX) 463 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 464 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 465 #endif 466 ai = aa->i; 467 aj = aa->j; 468 PetscCall(MatGetBlockSize(A, &bs)); 469 if (reuse == MAT_INITIAL_MATRIX) { 470 nz = aa->nz; 471 PetscCall(PetscMalloc2(bs2 * nz, &row, bs2 * nz, &col)); 472 if (bs > 1) { 473 PetscCall(PetscMalloc1(bs2 * nz, &mumps->val_alloc)); 474 mumps->val = mumps->val_alloc; 475 } else { 476 mumps->val = aa->a; 477 } 478 mumps->irn = row; 479 mumps->jcn = col; 480 } else { 481 if (bs == 1) mumps->val = aa->a; 482 row = mumps->irn; 483 col = mumps->jcn; 484 } 485 val = mumps->val; 486 487 nz = 0; 488 if (bs > 1) { 489 for (i = 0; i < mbs; i++) { 490 rnz = ai[i + 1] - ai[i]; 491 ajj = aj + ai[i]; 492 for (j = 0; j < rnz; j++) { 493 for (k = 0; k < bs; k++) { 494 for (m = 0; m < bs; m++) { 495 if (ajj[j] > i || k >= m) { 496 if (reuse == MAT_INITIAL_MATRIX) { 497 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 498 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 499 } 500 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 501 } 502 } 503 } 504 } 505 } 506 } else if (reuse == MAT_INITIAL_MATRIX) { 507 for (i = 0; i < mbs; i++) { 508 rnz = ai[i + 1] - ai[i]; 509 ajj = aj + ai[i]; 510 for (j = 0; j < rnz; j++) { 511 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 512 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 513 nz++; 514 } 515 } 516 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 517 } 518 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 523 { 524 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 525 PetscInt64 nz, rnz, i, j; 526 const PetscScalar *av, *v1; 527 PetscScalar *val; 528 PetscMUMPSInt *row, *col; 529 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 530 PetscBool missing; 531 #if defined(PETSC_USE_COMPLEX) 532 PetscBool hermitian, isset; 533 #endif 534 535 PetscFunctionBegin; 536 #if defined(PETSC_USE_COMPLEX) 537 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 538 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 539 #endif 540 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 541 ai = aa->i; 542 aj = aa->j; 543 adiag = aa->diag; 544 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 545 if (reuse == MAT_INITIAL_MATRIX) { 546 /* count nz in the upper triangular part of A */ 547 nz = 0; 548 if (missing) { 549 for (i = 0; i < M; i++) { 550 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 551 for (j = ai[i]; j < ai[i + 1]; j++) { 552 if (aj[j] < i) continue; 553 nz++; 554 } 555 } else { 556 nz += ai[i + 1] - adiag[i]; 557 } 558 } 559 } else { 560 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 561 } 562 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 563 PetscCall(PetscMalloc1(nz, &val)); 564 mumps->nnz = nz; 565 mumps->irn = row; 566 mumps->jcn = col; 567 mumps->val = mumps->val_alloc = val; 568 569 nz = 0; 570 if (missing) { 571 for (i = 0; i < M; i++) { 572 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 573 for (j = ai[i]; j < ai[i + 1]; j++) { 574 if (aj[j] < i) continue; 575 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 576 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 577 val[nz] = av[j]; 578 nz++; 579 } 580 } else { 581 rnz = ai[i + 1] - adiag[i]; 582 ajj = aj + adiag[i]; 583 v1 = av + adiag[i]; 584 for (j = 0; j < rnz; j++) { 585 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 586 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 587 val[nz++] = v1[j]; 588 } 589 } 590 } 591 } else { 592 for (i = 0; i < M; i++) { 593 rnz = ai[i + 1] - adiag[i]; 594 ajj = aj + adiag[i]; 595 v1 = av + adiag[i]; 596 for (j = 0; j < rnz; j++) { 597 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 598 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 599 val[nz++] = v1[j]; 600 } 601 } 602 } 603 } else { 604 nz = 0; 605 val = mumps->val; 606 if (missing) { 607 for (i = 0; i < M; i++) { 608 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 609 for (j = ai[i]; j < ai[i + 1]; j++) { 610 if (aj[j] < i) continue; 611 val[nz++] = av[j]; 612 } 613 } else { 614 rnz = ai[i + 1] - adiag[i]; 615 v1 = av + adiag[i]; 616 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 617 } 618 } 619 } else { 620 for (i = 0; i < M; i++) { 621 rnz = ai[i + 1] - adiag[i]; 622 v1 = av + adiag[i]; 623 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 624 } 625 } 626 } 627 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 628 PetscFunctionReturn(0); 629 } 630 631 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 632 { 633 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 634 PetscInt bs; 635 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 636 PetscMUMPSInt *row, *col; 637 const PetscScalar *av, *bv, *v1, *v2; 638 PetscScalar *val; 639 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 640 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 641 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 642 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 643 #if defined(PETSC_USE_COMPLEX) 644 PetscBool hermitian, isset; 645 #endif 646 647 PetscFunctionBegin; 648 #if defined(PETSC_USE_COMPLEX) 649 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 650 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 651 #endif 652 PetscCall(MatGetBlockSize(A, &bs)); 653 rstart = A->rmap->rstart; 654 ai = aa->i; 655 aj = aa->j; 656 bi = bb->i; 657 bj = bb->j; 658 av = aa->a; 659 bv = bb->a; 660 661 garray = mat->garray; 662 663 if (reuse == MAT_INITIAL_MATRIX) { 664 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 665 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 666 PetscCall(PetscMalloc1(nz, &val)); 667 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 668 mumps->irn = row; 669 mumps->jcn = col; 670 mumps->val = mumps->val_alloc = val; 671 } else { 672 val = mumps->val; 673 } 674 675 jj = 0; 676 irow = rstart; 677 for (i = 0; i < mbs; i++) { 678 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 679 countA = ai[i + 1] - ai[i]; 680 countB = bi[i + 1] - bi[i]; 681 bjj = bj + bi[i]; 682 v1 = av + ai[i] * bs2; 683 v2 = bv + bi[i] * bs2; 684 685 if (bs > 1) { 686 /* A-part */ 687 for (j = 0; j < countA; j++) { 688 for (k = 0; k < bs; k++) { 689 for (m = 0; m < bs; m++) { 690 if (rstart + ajj[j] * bs > irow || k >= m) { 691 if (reuse == MAT_INITIAL_MATRIX) { 692 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 693 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 694 } 695 val[jj++] = v1[j * bs2 + m + k * bs]; 696 } 697 } 698 } 699 } 700 701 /* B-part */ 702 for (j = 0; j < countB; j++) { 703 for (k = 0; k < bs; k++) { 704 for (m = 0; m < bs; m++) { 705 if (reuse == MAT_INITIAL_MATRIX) { 706 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 707 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 708 } 709 val[jj++] = v2[j * bs2 + m + k * bs]; 710 } 711 } 712 } 713 } else { 714 /* A-part */ 715 for (j = 0; j < countA; j++) { 716 if (reuse == MAT_INITIAL_MATRIX) { 717 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 718 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 719 } 720 val[jj++] = v1[j]; 721 } 722 723 /* B-part */ 724 for (j = 0; j < countB; j++) { 725 if (reuse == MAT_INITIAL_MATRIX) { 726 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 727 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 728 } 729 val[jj++] = v2[j]; 730 } 731 } 732 irow += bs; 733 } 734 mumps->nnz = jj; 735 PetscFunctionReturn(0); 736 } 737 738 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 739 { 740 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 741 PetscInt64 rstart, nz, i, j, jj, irow, countA, countB; 742 PetscMUMPSInt *row, *col; 743 const PetscScalar *av, *bv, *v1, *v2; 744 PetscScalar *val; 745 Mat Ad, Ao; 746 Mat_SeqAIJ *aa; 747 Mat_SeqAIJ *bb; 748 749 PetscFunctionBegin; 750 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 751 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 752 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 753 754 aa = (Mat_SeqAIJ *)(Ad)->data; 755 bb = (Mat_SeqAIJ *)(Ao)->data; 756 ai = aa->i; 757 aj = aa->j; 758 bi = bb->i; 759 bj = bb->j; 760 761 rstart = A->rmap->rstart; 762 763 if (reuse == MAT_INITIAL_MATRIX) { 764 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 765 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 766 PetscCall(PetscMalloc1(nz, &val)); 767 mumps->nnz = nz; 768 mumps->irn = row; 769 mumps->jcn = col; 770 mumps->val = mumps->val_alloc = val; 771 } else { 772 val = mumps->val; 773 } 774 775 jj = 0; 776 irow = rstart; 777 for (i = 0; i < m; i++) { 778 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 779 countA = ai[i + 1] - ai[i]; 780 countB = bi[i + 1] - bi[i]; 781 bjj = bj + bi[i]; 782 v1 = av + ai[i]; 783 v2 = bv + bi[i]; 784 785 /* A-part */ 786 for (j = 0; j < countA; j++) { 787 if (reuse == MAT_INITIAL_MATRIX) { 788 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 789 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 790 } 791 val[jj++] = v1[j]; 792 } 793 794 /* B-part */ 795 for (j = 0; j < countB; j++) { 796 if (reuse == MAT_INITIAL_MATRIX) { 797 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 798 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 799 } 800 val[jj++] = v2[j]; 801 } 802 irow++; 803 } 804 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 805 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 806 PetscFunctionReturn(0); 807 } 808 809 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 810 { 811 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 812 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 813 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 814 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 815 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart; 816 const PetscInt bs2 = mat->bs2; 817 PetscInt bs; 818 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 819 PetscMUMPSInt *row, *col; 820 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 821 PetscScalar *val; 822 823 PetscFunctionBegin; 824 PetscCall(MatGetBlockSize(A, &bs)); 825 if (reuse == MAT_INITIAL_MATRIX) { 826 nz = bs2 * (aa->nz + bb->nz); 827 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 828 PetscCall(PetscMalloc1(nz, &val)); 829 mumps->nnz = nz; 830 mumps->irn = row; 831 mumps->jcn = col; 832 mumps->val = mumps->val_alloc = val; 833 } else { 834 val = mumps->val; 835 } 836 837 jj = 0; 838 irow = rstart; 839 for (i = 0; i < mbs; i++) { 840 countA = ai[i + 1] - ai[i]; 841 countB = bi[i + 1] - bi[i]; 842 ajj = aj + ai[i]; 843 bjj = bj + bi[i]; 844 v1 = av + bs2 * ai[i]; 845 v2 = bv + bs2 * bi[i]; 846 847 idx = 0; 848 /* A-part */ 849 for (k = 0; k < countA; k++) { 850 for (j = 0; j < bs; j++) { 851 for (n = 0; n < bs; n++) { 852 if (reuse == MAT_INITIAL_MATRIX) { 853 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 854 PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj])); 855 } 856 val[jj++] = v1[idx++]; 857 } 858 } 859 } 860 861 idx = 0; 862 /* B-part */ 863 for (k = 0; k < countB; k++) { 864 for (j = 0; j < bs; j++) { 865 for (n = 0; n < bs; n++) { 866 if (reuse == MAT_INITIAL_MATRIX) { 867 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 868 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 869 } 870 val[jj++] = v2[idx++]; 871 } 872 } 873 } 874 irow += bs; 875 } 876 PetscFunctionReturn(0); 877 } 878 879 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 880 { 881 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 882 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 883 PetscMUMPSInt *row, *col; 884 const PetscScalar *av, *bv, *v1, *v2; 885 PetscScalar *val; 886 Mat Ad, Ao; 887 Mat_SeqAIJ *aa; 888 Mat_SeqAIJ *bb; 889 #if defined(PETSC_USE_COMPLEX) 890 PetscBool hermitian, isset; 891 #endif 892 893 PetscFunctionBegin; 894 #if defined(PETSC_USE_COMPLEX) 895 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 896 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 897 #endif 898 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 899 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 900 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 901 902 aa = (Mat_SeqAIJ *)(Ad)->data; 903 bb = (Mat_SeqAIJ *)(Ao)->data; 904 ai = aa->i; 905 aj = aa->j; 906 adiag = aa->diag; 907 bi = bb->i; 908 bj = bb->j; 909 910 rstart = A->rmap->rstart; 911 912 if (reuse == MAT_INITIAL_MATRIX) { 913 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 914 nzb = 0; /* num of upper triangular entries in mat->B */ 915 for (i = 0; i < m; i++) { 916 nza += (ai[i + 1] - adiag[i]); 917 countB = bi[i + 1] - bi[i]; 918 bjj = bj + bi[i]; 919 for (j = 0; j < countB; j++) { 920 if (garray[bjj[j]] > rstart) nzb++; 921 } 922 } 923 924 nz = nza + nzb; /* total nz of upper triangular part of mat */ 925 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 926 PetscCall(PetscMalloc1(nz, &val)); 927 mumps->nnz = nz; 928 mumps->irn = row; 929 mumps->jcn = col; 930 mumps->val = mumps->val_alloc = val; 931 } else { 932 val = mumps->val; 933 } 934 935 jj = 0; 936 irow = rstart; 937 for (i = 0; i < m; i++) { 938 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 939 v1 = av + adiag[i]; 940 countA = ai[i + 1] - adiag[i]; 941 countB = bi[i + 1] - bi[i]; 942 bjj = bj + bi[i]; 943 v2 = bv + bi[i]; 944 945 /* A-part */ 946 for (j = 0; j < countA; j++) { 947 if (reuse == MAT_INITIAL_MATRIX) { 948 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 949 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 950 } 951 val[jj++] = v1[j]; 952 } 953 954 /* B-part */ 955 for (j = 0; j < countB; j++) { 956 if (garray[bjj[j]] > rstart) { 957 if (reuse == MAT_INITIAL_MATRIX) { 958 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 959 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 960 } 961 val[jj++] = v2[j]; 962 } 963 } 964 irow++; 965 } 966 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 967 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 968 PetscFunctionReturn(0); 969 } 970 971 PetscErrorCode MatDestroy_MUMPS(Mat A) 972 { 973 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 974 975 PetscFunctionBegin; 976 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 977 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 978 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 979 PetscCall(VecDestroy(&mumps->b_seq)); 980 PetscCall(VecDestroy(&mumps->x_seq)); 981 PetscCall(PetscFree(mumps->id.perm_in)); 982 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 983 PetscCall(PetscFree(mumps->val_alloc)); 984 PetscCall(PetscFree(mumps->info)); 985 PetscCall(PetscFree(mumps->ICNTL_pre)); 986 PetscCall(PetscFree(mumps->CNTL_pre)); 987 PetscCall(MatMumpsResetSchur_Private(mumps)); 988 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 989 mumps->id.job = JOB_END; 990 PetscMUMPS_c(mumps); 991 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 992 if (mumps->mumps_comm != MPI_COMM_NULL) { 993 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 994 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 995 } 996 } 997 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 998 if (mumps->use_petsc_omp_support) { 999 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1000 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1001 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1002 } 1003 #endif 1004 PetscCall(PetscFree(mumps->ia_alloc)); 1005 PetscCall(PetscFree(mumps->ja_alloc)); 1006 PetscCall(PetscFree(mumps->recvcount)); 1007 PetscCall(PetscFree(mumps->reqs)); 1008 PetscCall(PetscFree(mumps->irhs_loc)); 1009 PetscCall(PetscFree(A->data)); 1010 1011 /* clear composed functions */ 1012 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1013 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1014 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1015 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1016 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1017 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1018 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1019 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1020 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1021 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1022 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1023 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1024 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1025 PetscFunctionReturn(0); 1026 } 1027 1028 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1029 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1030 { 1031 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1032 const PetscMPIInt ompsize = mumps->omp_comm_size; 1033 PetscInt i, m, M, rstart; 1034 1035 PetscFunctionBegin; 1036 PetscCall(MatGetSize(A, &M, NULL)); 1037 PetscCall(MatGetLocalSize(A, &m, NULL)); 1038 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1039 if (ompsize == 1) { 1040 if (!mumps->irhs_loc) { 1041 mumps->nloc_rhs = m; 1042 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1043 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1044 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1045 } 1046 mumps->id.rhs_loc = (MumpsScalar *)array; 1047 } else { 1048 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1049 const PetscInt *ranges; 1050 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1051 MPI_Group petsc_group, omp_group; 1052 PetscScalar *recvbuf = NULL; 1053 1054 if (mumps->is_omp_master) { 1055 /* Lazily initialize the omp stuff for distributed rhs */ 1056 if (!mumps->irhs_loc) { 1057 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1058 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1059 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1060 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1061 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1062 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1063 1064 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1065 mumps->nloc_rhs = 0; 1066 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1067 for (j = 0; j < ompsize; j++) { 1068 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1069 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1070 } 1071 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1072 for (j = k = 0; j < ompsize; j++) { 1073 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1074 } 1075 1076 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1077 PetscCallMPI(MPI_Group_free(&petsc_group)); 1078 PetscCallMPI(MPI_Group_free(&omp_group)); 1079 } 1080 1081 /* Realloc buffers when current nrhs is bigger than what we have met */ 1082 if (nrhs > mumps->max_nrhs) { 1083 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1084 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1085 mumps->max_nrhs = nrhs; 1086 } 1087 1088 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1089 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1090 mumps->rhs_disps[0] = 0; 1091 for (j = 1; j < ompsize; j++) { 1092 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1093 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1094 } 1095 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1096 } 1097 1098 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1099 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1100 1101 if (mumps->is_omp_master) { 1102 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1103 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1104 for (j = 0; j < ompsize; j++) { 1105 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1106 dst = dstbase; 1107 for (i = 0; i < nrhs; i++) { 1108 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1109 src += mumps->rhs_nrow[j]; 1110 dst += mumps->nloc_rhs; 1111 } 1112 dstbase += mumps->rhs_nrow[j]; 1113 } 1114 } 1115 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1116 } 1117 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1118 } 1119 mumps->id.nrhs = nrhs; 1120 mumps->id.nloc_rhs = mumps->nloc_rhs; 1121 mumps->id.lrhs_loc = mumps->nloc_rhs; 1122 mumps->id.irhs_loc = mumps->irhs_loc; 1123 PetscFunctionReturn(0); 1124 } 1125 1126 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1127 { 1128 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1129 const PetscScalar *rarray = NULL; 1130 PetscScalar *array; 1131 IS is_iden, is_petsc; 1132 PetscInt i; 1133 PetscBool second_solve = PETSC_FALSE; 1134 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1135 1136 PetscFunctionBegin; 1137 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1138 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1139 &cite1)); 1140 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1141 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1142 &cite2)); 1143 1144 if (A->factorerrortype) { 1145 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1146 PetscCall(VecSetInf(x)); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 mumps->id.nrhs = 1; 1151 if (mumps->petsc_size > 1) { 1152 if (mumps->ICNTL20 == 10) { 1153 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1154 PetscCall(VecGetArrayRead(b, &rarray)); 1155 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1156 } else { 1157 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1158 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1159 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1160 if (!mumps->myid) { 1161 PetscCall(VecGetArray(mumps->b_seq, &array)); 1162 mumps->id.rhs = (MumpsScalar *)array; 1163 } 1164 } 1165 } else { /* petsc_size == 1 */ 1166 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1167 PetscCall(VecCopy(b, x)); 1168 PetscCall(VecGetArray(x, &array)); 1169 mumps->id.rhs = (MumpsScalar *)array; 1170 } 1171 1172 /* 1173 handle condensation step of Schur complement (if any) 1174 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1175 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1176 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1177 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1178 */ 1179 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1180 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1181 second_solve = PETSC_TRUE; 1182 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1183 } 1184 /* solve phase */ 1185 /*-------------*/ 1186 mumps->id.job = JOB_SOLVE; 1187 PetscMUMPS_c(mumps); 1188 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1189 1190 /* handle expansion step of Schur complement (if any) */ 1191 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1192 1193 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1194 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1195 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1196 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1197 } 1198 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1199 PetscInt *isol2_loc = NULL; 1200 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1201 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1202 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1203 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1204 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1205 PetscCall(ISDestroy(&is_iden)); 1206 PetscCall(ISDestroy(&is_petsc)); 1207 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1208 } 1209 1210 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1211 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1212 } 1213 1214 if (mumps->petsc_size > 1) { 1215 if (mumps->ICNTL20 == 10) { 1216 PetscCall(VecRestoreArrayRead(b, &rarray)); 1217 } else if (!mumps->myid) { 1218 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1219 } 1220 } else PetscCall(VecRestoreArray(x, &array)); 1221 1222 PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3))); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1227 { 1228 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1229 1230 PetscFunctionBegin; 1231 mumps->id.ICNTL(9) = 0; 1232 PetscCall(MatSolve_MUMPS(A, b, x)); 1233 mumps->id.ICNTL(9) = 1; 1234 PetscFunctionReturn(0); 1235 } 1236 1237 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1238 { 1239 Mat Bt = NULL; 1240 PetscBool denseX, denseB, flg, flgT; 1241 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1242 PetscInt i, nrhs, M; 1243 PetscScalar *array; 1244 const PetscScalar *rbray; 1245 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1246 PetscMUMPSInt *isol_loc, *isol_loc_save; 1247 PetscScalar *bray, *sol_loc, *sol_loc_save; 1248 IS is_to, is_from; 1249 PetscInt k, proc, j, m, myrstart; 1250 const PetscInt *rstart; 1251 Vec v_mpi, msol_loc; 1252 VecScatter scat_sol; 1253 Vec b_seq; 1254 VecScatter scat_rhs; 1255 PetscScalar *aa; 1256 PetscInt spnr, *ia, *ja; 1257 Mat_MPIAIJ *b = NULL; 1258 1259 PetscFunctionBegin; 1260 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1261 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1262 1263 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1264 if (denseB) { 1265 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1266 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1267 } else { /* sparse B */ 1268 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1269 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1270 if (flgT) { /* input B is transpose of actural RHS matrix, 1271 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1272 PetscCall(MatTransposeGetMat(B, &Bt)); 1273 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1274 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1275 } 1276 1277 PetscCall(MatGetSize(B, &M, &nrhs)); 1278 mumps->id.nrhs = nrhs; 1279 mumps->id.lrhs = M; 1280 mumps->id.rhs = NULL; 1281 1282 if (mumps->petsc_size == 1) { 1283 PetscScalar *aa; 1284 PetscInt spnr, *ia, *ja; 1285 PetscBool second_solve = PETSC_FALSE; 1286 1287 PetscCall(MatDenseGetArray(X, &array)); 1288 mumps->id.rhs = (MumpsScalar *)array; 1289 1290 if (denseB) { 1291 /* copy B to X */ 1292 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1293 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1294 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1295 } else { /* sparse B */ 1296 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1297 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1298 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1299 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1300 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1301 } 1302 /* handle condensation step of Schur complement (if any) */ 1303 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1304 second_solve = PETSC_TRUE; 1305 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1306 } 1307 /* solve phase */ 1308 /*-------------*/ 1309 mumps->id.job = JOB_SOLVE; 1310 PetscMUMPS_c(mumps); 1311 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1312 1313 /* handle expansion step of Schur complement (if any) */ 1314 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1315 if (!denseB) { /* sparse B */ 1316 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1317 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1318 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1319 } 1320 PetscCall(MatDenseRestoreArray(X, &array)); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1325 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1326 1327 /* create msol_loc to hold mumps local solution */ 1328 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1329 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1330 1331 lsol_loc = mumps->id.lsol_loc; 1332 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1333 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1334 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1335 mumps->id.isol_loc = isol_loc; 1336 1337 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1338 1339 if (denseB) { 1340 if (mumps->ICNTL20 == 10) { 1341 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1342 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1343 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1344 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1345 PetscCall(MatGetLocalSize(B, &m, NULL)); 1346 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1347 } else { 1348 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1349 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1350 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1351 0, re-arrange B into desired order, which is a local operation. 1352 */ 1353 1354 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1355 /* wrap dense rhs matrix B into a vector v_mpi */ 1356 PetscCall(MatGetLocalSize(B, &m, NULL)); 1357 PetscCall(MatDenseGetArray(B, &bray)); 1358 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1359 PetscCall(MatDenseRestoreArray(B, &bray)); 1360 1361 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1362 if (!mumps->myid) { 1363 PetscInt *idx; 1364 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1365 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1366 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1367 k = 0; 1368 for (proc = 0; proc < mumps->petsc_size; proc++) { 1369 for (j = 0; j < nrhs; j++) { 1370 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1371 } 1372 } 1373 1374 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1375 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1376 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1377 } else { 1378 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1379 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1380 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1381 } 1382 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1383 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1384 PetscCall(ISDestroy(&is_to)); 1385 PetscCall(ISDestroy(&is_from)); 1386 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1387 1388 if (!mumps->myid) { /* define rhs on the host */ 1389 PetscCall(VecGetArray(b_seq, &bray)); 1390 mumps->id.rhs = (MumpsScalar *)bray; 1391 PetscCall(VecRestoreArray(b_seq, &bray)); 1392 } 1393 } 1394 } else { /* sparse B */ 1395 b = (Mat_MPIAIJ *)Bt->data; 1396 1397 /* wrap dense X into a vector v_mpi */ 1398 PetscCall(MatGetLocalSize(X, &m, NULL)); 1399 PetscCall(MatDenseGetArray(X, &bray)); 1400 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1401 PetscCall(MatDenseRestoreArray(X, &bray)); 1402 1403 if (!mumps->myid) { 1404 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1405 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1406 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1407 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1408 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1409 } else { 1410 mumps->id.irhs_ptr = NULL; 1411 mumps->id.irhs_sparse = NULL; 1412 mumps->id.nz_rhs = 0; 1413 mumps->id.rhs_sparse = NULL; 1414 } 1415 } 1416 1417 /* solve phase */ 1418 /*-------------*/ 1419 mumps->id.job = JOB_SOLVE; 1420 PetscMUMPS_c(mumps); 1421 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1422 1423 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1424 PetscCall(MatDenseGetArray(X, &array)); 1425 PetscCall(VecPlaceArray(v_mpi, array)); 1426 1427 /* create scatter scat_sol */ 1428 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1429 /* iidx: index for scatter mumps solution to petsc X */ 1430 1431 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1432 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1433 for (i = 0; i < lsol_loc; i++) { 1434 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1435 1436 for (proc = 0; proc < mumps->petsc_size; proc++) { 1437 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1438 myrstart = rstart[proc]; 1439 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1440 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1441 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1442 break; 1443 } 1444 } 1445 1446 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1447 } 1448 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1449 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1450 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1451 PetscCall(ISDestroy(&is_from)); 1452 PetscCall(ISDestroy(&is_to)); 1453 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1454 PetscCall(MatDenseRestoreArray(X, &array)); 1455 1456 /* free spaces */ 1457 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1458 mumps->id.isol_loc = isol_loc_save; 1459 1460 PetscCall(PetscFree2(sol_loc, isol_loc)); 1461 PetscCall(PetscFree(idxx)); 1462 PetscCall(VecDestroy(&msol_loc)); 1463 PetscCall(VecDestroy(&v_mpi)); 1464 if (!denseB) { 1465 if (!mumps->myid) { 1466 b = (Mat_MPIAIJ *)Bt->data; 1467 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1468 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1469 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1470 } 1471 } else { 1472 if (mumps->ICNTL20 == 0) { 1473 PetscCall(VecDestroy(&b_seq)); 1474 PetscCall(VecScatterDestroy(&scat_rhs)); 1475 } 1476 } 1477 PetscCall(VecScatterDestroy(&scat_sol)); 1478 PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3))); 1479 PetscFunctionReturn(0); 1480 } 1481 1482 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1483 { 1484 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1485 PetscMUMPSInt oldvalue = mumps->id.ICNTL(9); 1486 1487 PetscFunctionBegin; 1488 mumps->id.ICNTL(9) = 0; 1489 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1490 mumps->id.ICNTL(9) = oldvalue; 1491 PetscFunctionReturn(0); 1492 } 1493 1494 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1495 { 1496 PetscBool flg; 1497 Mat B; 1498 1499 PetscFunctionBegin; 1500 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1501 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1502 1503 /* Create B=Bt^T that uses Bt's data structure */ 1504 PetscCall(MatCreateTranspose(Bt, &B)); 1505 1506 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1507 PetscCall(MatDestroy(&B)); 1508 PetscFunctionReturn(0); 1509 } 1510 1511 #if !defined(PETSC_USE_COMPLEX) 1512 /* 1513 input: 1514 F: numeric factor 1515 output: 1516 nneg: total number of negative pivots 1517 nzero: total number of zero pivots 1518 npos: (global dimension of F) - nneg - nzero 1519 */ 1520 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1521 { 1522 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1523 PetscMPIInt size; 1524 1525 PetscFunctionBegin; 1526 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1527 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1528 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1529 1530 if (nneg) *nneg = mumps->id.INFOG(12); 1531 if (nzero || npos) { 1532 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1533 if (nzero) *nzero = mumps->id.INFOG(28); 1534 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1535 } 1536 PetscFunctionReturn(0); 1537 } 1538 #endif 1539 1540 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1541 { 1542 PetscInt i, nreqs; 1543 PetscMUMPSInt *irn, *jcn; 1544 PetscMPIInt count; 1545 PetscInt64 totnnz, remain; 1546 const PetscInt osize = mumps->omp_comm_size; 1547 PetscScalar *val; 1548 1549 PetscFunctionBegin; 1550 if (osize > 1) { 1551 if (reuse == MAT_INITIAL_MATRIX) { 1552 /* master first gathers counts of nonzeros to receive */ 1553 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1554 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1555 1556 /* Then each computes number of send/recvs */ 1557 if (mumps->is_omp_master) { 1558 /* Start from 1 since self communication is not done in MPI */ 1559 nreqs = 0; 1560 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1561 } else { 1562 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1563 } 1564 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */ 1565 1566 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1567 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1568 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1569 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1570 */ 1571 nreqs = 0; /* counter for actual send/recvs */ 1572 if (mumps->is_omp_master) { 1573 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1574 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1575 PetscCall(PetscMalloc1(totnnz, &val)); 1576 1577 /* Self communication */ 1578 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1579 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1580 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1581 1582 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1583 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1584 PetscCall(PetscFree(mumps->val_alloc)); 1585 mumps->nnz = totnnz; 1586 mumps->irn = irn; 1587 mumps->jcn = jcn; 1588 mumps->val = mumps->val_alloc = val; 1589 1590 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1591 jcn += mumps->recvcount[0]; 1592 val += mumps->recvcount[0]; 1593 1594 /* Remote communication */ 1595 for (i = 1; i < osize; i++) { 1596 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1597 remain = mumps->recvcount[i] - count; 1598 while (count > 0) { 1599 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1600 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1601 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1602 irn += count; 1603 jcn += count; 1604 val += count; 1605 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1606 remain -= count; 1607 } 1608 } 1609 } else { 1610 irn = mumps->irn; 1611 jcn = mumps->jcn; 1612 val = mumps->val; 1613 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1614 remain = mumps->nnz - count; 1615 while (count > 0) { 1616 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1617 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1618 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1619 irn += count; 1620 jcn += count; 1621 val += count; 1622 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1623 remain -= count; 1624 } 1625 } 1626 } else { 1627 nreqs = 0; 1628 if (mumps->is_omp_master) { 1629 val = mumps->val + mumps->recvcount[0]; 1630 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1631 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1632 remain = mumps->recvcount[i] - count; 1633 while (count > 0) { 1634 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1635 val += count; 1636 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1637 remain -= count; 1638 } 1639 } 1640 } else { 1641 val = mumps->val; 1642 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1643 remain = mumps->nnz - count; 1644 while (count > 0) { 1645 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1646 val += count; 1647 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1648 remain -= count; 1649 } 1650 } 1651 } 1652 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1653 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1654 } 1655 PetscFunctionReturn(0); 1656 } 1657 1658 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1659 { 1660 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1661 PetscBool isMPIAIJ; 1662 1663 PetscFunctionBegin; 1664 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1665 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1666 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1667 PetscFunctionReturn(0); 1668 } 1669 1670 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1671 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1672 1673 /* numerical factorization phase */ 1674 /*-------------------------------*/ 1675 mumps->id.job = JOB_FACTNUMERIC; 1676 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1677 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1678 } else { 1679 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1680 } 1681 PetscMUMPS_c(mumps); 1682 if (mumps->id.INFOG(1) < 0) { 1683 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 1684 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1685 PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1686 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1687 } else if (mumps->id.INFOG(1) == -13) { 1688 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1689 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1690 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1691 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1692 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1693 } else { 1694 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1695 F->factorerrortype = MAT_FACTOR_OTHER; 1696 } 1697 } 1698 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, " mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16)); 1699 1700 F->assembled = PETSC_TRUE; 1701 1702 if (F->schur) { /* reset Schur status to unfactored */ 1703 #if defined(PETSC_HAVE_CUDA) 1704 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1705 #endif 1706 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1707 mumps->id.ICNTL(19) = 2; 1708 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1709 } 1710 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1711 } 1712 1713 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1714 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1715 1716 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1717 if (mumps->petsc_size > 1) { 1718 PetscInt lsol_loc; 1719 PetscScalar *sol_loc; 1720 1721 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1722 1723 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1724 if (mumps->x_seq) { 1725 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1726 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1727 PetscCall(VecDestroy(&mumps->x_seq)); 1728 } 1729 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1730 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1731 mumps->id.lsol_loc = lsol_loc; 1732 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1733 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 1734 } 1735 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1736 PetscFunctionReturn(0); 1737 } 1738 1739 /* Sets MUMPS options from the options database */ 1740 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1741 { 1742 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1743 PetscMUMPSInt icntl = 0, size, *listvar_schur; 1744 PetscInt info[80], i, ninfo = 80, rbs, cbs; 1745 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1746 MumpsScalar *arr; 1747 1748 PetscFunctionBegin; 1749 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 1750 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1751 PetscInt nthreads = 0; 1752 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1753 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1754 1755 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1756 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 1757 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 1758 1759 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 1760 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1761 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1762 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 1763 if (mumps->use_petsc_omp_support) { 1764 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 1765 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1766 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1767 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1768 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 1769 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 1770 #endif 1771 } else { 1772 mumps->omp_comm = PETSC_COMM_SELF; 1773 mumps->mumps_comm = mumps->petsc_comm; 1774 mumps->is_omp_master = PETSC_TRUE; 1775 } 1776 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 1777 mumps->reqs = NULL; 1778 mumps->tag = 0; 1779 1780 if (mumps->mumps_comm != MPI_COMM_NULL) { 1781 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 1782 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1783 MPI_Comm comm; 1784 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 1785 mumps->mumps_comm = comm; 1786 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 1787 } 1788 1789 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1790 mumps->id.job = JOB_INIT; 1791 mumps->id.par = 1; /* host participates factorizaton and solve */ 1792 mumps->id.sym = mumps->sym; 1793 1794 size = mumps->id.size_schur; 1795 arr = mumps->id.schur; 1796 listvar_schur = mumps->id.listvar_schur; 1797 PetscMUMPS_c(mumps); 1798 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1799 /* restore cached ICNTL and CNTL values */ 1800 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 1801 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 1802 PetscCall(PetscFree(mumps->ICNTL_pre)); 1803 PetscCall(PetscFree(mumps->CNTL_pre)); 1804 1805 if (schur) { 1806 mumps->id.size_schur = size; 1807 mumps->id.schur_lld = size; 1808 mumps->id.schur = arr; 1809 mumps->id.listvar_schur = listvar_schur; 1810 if (mumps->petsc_size > 1) { 1811 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 1812 1813 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1814 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1815 PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 1816 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 1817 } else { 1818 if (F->factortype == MAT_FACTOR_LU) { 1819 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1820 } else { 1821 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1822 } 1823 } 1824 mumps->id.ICNTL(26) = -1; 1825 } 1826 1827 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1828 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1829 */ 1830 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 1831 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 1832 1833 mumps->scat_rhs = NULL; 1834 mumps->scat_sol = NULL; 1835 1836 /* set PETSc-MUMPS default options - override MUMPS default */ 1837 mumps->id.ICNTL(3) = 0; 1838 mumps->id.ICNTL(4) = 0; 1839 if (mumps->petsc_size == 1) { 1840 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1841 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1842 } else { 1843 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1844 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1845 } 1846 } 1847 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 1848 if (flg) mumps->id.ICNTL(1) = icntl; 1849 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 1850 if (flg) mumps->id.ICNTL(2) = icntl; 1851 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 1852 if (flg) mumps->id.ICNTL(3) = icntl; 1853 1854 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 1855 if (flg) mumps->id.ICNTL(4) = icntl; 1856 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1857 1858 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 1859 if (flg) mumps->id.ICNTL(6) = icntl; 1860 1861 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 1862 if (flg) { 1863 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 1864 mumps->id.ICNTL(7) = icntl; 1865 } 1866 1867 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 1868 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 1869 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 1870 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 1871 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 1872 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 1873 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 1874 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 1875 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 1876 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 1877 if (flg) { 1878 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 1879 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 1880 } 1881 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 1882 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1883 PetscCall(MatDestroy(&F->schur)); 1884 PetscCall(MatMumpsResetSchur_Private(mumps)); 1885 } 1886 1887 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 1888 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1889 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1890 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 1891 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 1892 In short, we could not use distributed RHS with MPICH until v4.0b1. 1893 */ 1894 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 1895 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1896 #else 1897 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1898 #endif 1899 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 1900 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 1901 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 1902 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 1903 #endif 1904 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 1905 1906 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 1907 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 1908 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 1909 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 1910 1911 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 1912 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 1913 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 1914 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 1915 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 1916 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 1917 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 1918 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 1919 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 1920 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 1921 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 1922 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 1923 1924 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 1925 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 1926 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 1927 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 1928 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 1929 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 1930 1931 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 1932 1933 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 1934 if (ninfo) { 1935 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 1936 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 1937 mumps->ninfo = ninfo; 1938 for (i = 0; i < ninfo; i++) { 1939 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 1940 mumps->info[i] = info[i]; 1941 } 1942 } 1943 PetscOptionsEnd(); 1944 PetscFunctionReturn(0); 1945 } 1946 1947 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 1948 { 1949 PetscFunctionBegin; 1950 if (mumps->id.INFOG(1) < 0) { 1951 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1952 if (mumps->id.INFOG(1) == -6) { 1953 PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1954 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1955 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1956 PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1957 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1958 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1959 PetscCall(PetscInfo(F, "Empty matrix\n")); 1960 } else { 1961 PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1962 F->factorerrortype = MAT_FACTOR_OTHER; 1963 } 1964 } 1965 PetscFunctionReturn(0); 1966 } 1967 1968 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 1969 { 1970 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1971 Vec b; 1972 const PetscInt M = A->rmap->N; 1973 1974 PetscFunctionBegin; 1975 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 1976 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 1977 PetscFunctionReturn(0); 1978 } 1979 1980 /* Set MUMPS options from the options database */ 1981 PetscCall(MatSetFromOptions_MUMPS(F, A)); 1982 1983 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 1984 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 1985 1986 /* analysis phase */ 1987 /*----------------*/ 1988 mumps->id.job = JOB_FACTSYMBOLIC; 1989 mumps->id.n = M; 1990 switch (mumps->id.ICNTL(18)) { 1991 case 0: /* centralized assembled matrix input */ 1992 if (!mumps->myid) { 1993 mumps->id.nnz = mumps->nnz; 1994 mumps->id.irn = mumps->irn; 1995 mumps->id.jcn = mumps->jcn; 1996 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 1997 if (r) { 1998 mumps->id.ICNTL(7) = 1; 1999 if (!mumps->myid) { 2000 const PetscInt *idx; 2001 PetscInt i; 2002 2003 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2004 PetscCall(ISGetIndices(r, &idx)); 2005 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2006 PetscCall(ISRestoreIndices(r, &idx)); 2007 } 2008 } 2009 } 2010 break; 2011 case 3: /* distributed assembled matrix input (size>1) */ 2012 mumps->id.nnz_loc = mumps->nnz; 2013 mumps->id.irn_loc = mumps->irn; 2014 mumps->id.jcn_loc = mumps->jcn; 2015 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2016 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2017 PetscCall(MatCreateVecs(A, NULL, &b)); 2018 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2019 PetscCall(VecDestroy(&b)); 2020 } 2021 break; 2022 } 2023 PetscMUMPS_c(mumps); 2024 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2025 2026 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2027 F->ops->solve = MatSolve_MUMPS; 2028 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2029 F->ops->matsolve = MatMatSolve_MUMPS; 2030 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2031 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2032 2033 mumps->matstruc = SAME_NONZERO_PATTERN; 2034 PetscFunctionReturn(0); 2035 } 2036 2037 /* Note the Petsc r and c permutations are ignored */ 2038 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2039 { 2040 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2041 Vec b; 2042 const PetscInt M = A->rmap->N; 2043 2044 PetscFunctionBegin; 2045 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2046 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2047 PetscFunctionReturn(0); 2048 } 2049 2050 /* Set MUMPS options from the options database */ 2051 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2052 2053 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2054 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2055 2056 /* analysis phase */ 2057 /*----------------*/ 2058 mumps->id.job = JOB_FACTSYMBOLIC; 2059 mumps->id.n = M; 2060 switch (mumps->id.ICNTL(18)) { 2061 case 0: /* centralized assembled matrix input */ 2062 if (!mumps->myid) { 2063 mumps->id.nnz = mumps->nnz; 2064 mumps->id.irn = mumps->irn; 2065 mumps->id.jcn = mumps->jcn; 2066 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2067 } 2068 break; 2069 case 3: /* distributed assembled matrix input (size>1) */ 2070 mumps->id.nnz_loc = mumps->nnz; 2071 mumps->id.irn_loc = mumps->irn; 2072 mumps->id.jcn_loc = mumps->jcn; 2073 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2074 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2075 PetscCall(MatCreateVecs(A, NULL, &b)); 2076 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2077 PetscCall(VecDestroy(&b)); 2078 } 2079 break; 2080 } 2081 PetscMUMPS_c(mumps); 2082 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2083 2084 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2085 F->ops->solve = MatSolve_MUMPS; 2086 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2087 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2088 2089 mumps->matstruc = SAME_NONZERO_PATTERN; 2090 PetscFunctionReturn(0); 2091 } 2092 2093 /* Note the Petsc r permutation and factor info are ignored */ 2094 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2095 { 2096 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2097 Vec b; 2098 const PetscInt M = A->rmap->N; 2099 2100 PetscFunctionBegin; 2101 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2102 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2103 PetscFunctionReturn(0); 2104 } 2105 2106 /* Set MUMPS options from the options database */ 2107 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2108 2109 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2110 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2111 2112 /* analysis phase */ 2113 /*----------------*/ 2114 mumps->id.job = JOB_FACTSYMBOLIC; 2115 mumps->id.n = M; 2116 switch (mumps->id.ICNTL(18)) { 2117 case 0: /* centralized assembled matrix input */ 2118 if (!mumps->myid) { 2119 mumps->id.nnz = mumps->nnz; 2120 mumps->id.irn = mumps->irn; 2121 mumps->id.jcn = mumps->jcn; 2122 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2123 } 2124 break; 2125 case 3: /* distributed assembled matrix input (size>1) */ 2126 mumps->id.nnz_loc = mumps->nnz; 2127 mumps->id.irn_loc = mumps->irn; 2128 mumps->id.jcn_loc = mumps->jcn; 2129 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2130 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2131 PetscCall(MatCreateVecs(A, NULL, &b)); 2132 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2133 PetscCall(VecDestroy(&b)); 2134 } 2135 break; 2136 } 2137 PetscMUMPS_c(mumps); 2138 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2139 2140 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2141 F->ops->solve = MatSolve_MUMPS; 2142 F->ops->solvetranspose = MatSolve_MUMPS; 2143 F->ops->matsolve = MatMatSolve_MUMPS; 2144 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2145 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2146 #if defined(PETSC_USE_COMPLEX) 2147 F->ops->getinertia = NULL; 2148 #else 2149 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2150 #endif 2151 2152 mumps->matstruc = SAME_NONZERO_PATTERN; 2153 PetscFunctionReturn(0); 2154 } 2155 2156 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2157 { 2158 PetscBool iascii; 2159 PetscViewerFormat format; 2160 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2161 2162 PetscFunctionBegin; 2163 /* check if matrix is mumps type */ 2164 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2165 2166 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2167 if (iascii) { 2168 PetscCall(PetscViewerGetFormat(viewer, &format)); 2169 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2170 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2171 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2172 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2173 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2174 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2175 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2176 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2177 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2178 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2179 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2180 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2181 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2182 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2183 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2184 if (mumps->id.ICNTL(11) > 0) { 2185 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2186 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2187 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2188 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2189 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2190 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2191 } 2192 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2193 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2194 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2195 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2196 /* ICNTL(15-17) not used */ 2197 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2198 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2199 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2200 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2201 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2202 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2203 2204 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2205 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2206 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2207 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2208 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2209 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2210 2211 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2212 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2213 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2214 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2215 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2216 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2217 2218 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2219 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2220 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2221 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2222 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2223 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2224 2225 /* information local to each processor */ 2226 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2227 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2228 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2229 PetscCall(PetscViewerFlush(viewer)); 2230 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2231 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2232 PetscCall(PetscViewerFlush(viewer)); 2233 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2234 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2235 PetscCall(PetscViewerFlush(viewer)); 2236 2237 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2238 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2239 PetscCall(PetscViewerFlush(viewer)); 2240 2241 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2242 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2243 PetscCall(PetscViewerFlush(viewer)); 2244 2245 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2246 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2247 PetscCall(PetscViewerFlush(viewer)); 2248 2249 if (mumps->ninfo && mumps->ninfo <= 80) { 2250 PetscInt i; 2251 for (i = 0; i < mumps->ninfo; i++) { 2252 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2253 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2254 PetscCall(PetscViewerFlush(viewer)); 2255 } 2256 } 2257 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2258 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2259 2260 if (mumps->myid == 0) { /* information from the host */ 2261 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2262 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2263 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2264 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2265 2266 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2267 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2268 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2269 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2270 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2271 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2272 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2273 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2274 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2275 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2276 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2277 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2278 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2279 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2280 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2281 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2282 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2283 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2284 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2285 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2286 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2287 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2288 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2289 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2290 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2291 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2292 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2293 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2294 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2295 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2296 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2297 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2298 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2299 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2300 } 2301 } 2302 } 2303 PetscFunctionReturn(0); 2304 } 2305 2306 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2307 { 2308 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2309 2310 PetscFunctionBegin; 2311 info->block_size = 1.0; 2312 info->nz_allocated = mumps->id.INFOG(20); 2313 info->nz_used = mumps->id.INFOG(20); 2314 info->nz_unneeded = 0.0; 2315 info->assemblies = 0.0; 2316 info->mallocs = 0.0; 2317 info->memory = 0.0; 2318 info->fill_ratio_given = 0; 2319 info->fill_ratio_needed = 0; 2320 info->factor_mallocs = 0; 2321 PetscFunctionReturn(0); 2322 } 2323 2324 /* -------------------------------------------------------------------------------------------*/ 2325 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2326 { 2327 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2328 const PetscScalar *arr; 2329 const PetscInt *idxs; 2330 PetscInt size, i; 2331 2332 PetscFunctionBegin; 2333 PetscCall(ISGetLocalSize(is, &size)); 2334 /* Schur complement matrix */ 2335 PetscCall(MatDestroy(&F->schur)); 2336 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2337 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2338 mumps->id.schur = (MumpsScalar *)arr; 2339 mumps->id.size_schur = size; 2340 mumps->id.schur_lld = size; 2341 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2342 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2343 2344 /* MUMPS expects Fortran style indices */ 2345 PetscCall(PetscFree(mumps->id.listvar_schur)); 2346 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2347 PetscCall(ISGetIndices(is, &idxs)); 2348 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2349 PetscCall(ISRestoreIndices(is, &idxs)); 2350 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2351 mumps->id.ICNTL(26) = -1; 2352 PetscFunctionReturn(0); 2353 } 2354 2355 /* -------------------------------------------------------------------------------------------*/ 2356 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2357 { 2358 Mat St; 2359 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2360 PetscScalar *array; 2361 #if defined(PETSC_USE_COMPLEX) 2362 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2363 #endif 2364 2365 PetscFunctionBegin; 2366 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2367 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2368 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2369 PetscCall(MatSetType(St, MATDENSE)); 2370 PetscCall(MatSetUp(St)); 2371 PetscCall(MatDenseGetArray(St, &array)); 2372 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2373 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2374 PetscInt i, j, N = mumps->id.size_schur; 2375 for (i = 0; i < N; i++) { 2376 for (j = 0; j < N; j++) { 2377 #if !defined(PETSC_USE_COMPLEX) 2378 PetscScalar val = mumps->id.schur[i * N + j]; 2379 #else 2380 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2381 #endif 2382 array[j * N + i] = val; 2383 } 2384 } 2385 } else { /* stored by columns */ 2386 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2387 } 2388 } else { /* either full or lower-triangular (not packed) */ 2389 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2390 PetscInt i, j, N = mumps->id.size_schur; 2391 for (i = 0; i < N; i++) { 2392 for (j = i; j < N; j++) { 2393 #if !defined(PETSC_USE_COMPLEX) 2394 PetscScalar val = mumps->id.schur[i * N + j]; 2395 #else 2396 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2397 #endif 2398 array[i * N + j] = val; 2399 array[j * N + i] = val; 2400 } 2401 } 2402 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2403 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2404 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2405 PetscInt i, j, N = mumps->id.size_schur; 2406 for (i = 0; i < N; i++) { 2407 for (j = 0; j < i + 1; j++) { 2408 #if !defined(PETSC_USE_COMPLEX) 2409 PetscScalar val = mumps->id.schur[i * N + j]; 2410 #else 2411 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2412 #endif 2413 array[i * N + j] = val; 2414 array[j * N + i] = val; 2415 } 2416 } 2417 } 2418 } 2419 PetscCall(MatDenseRestoreArray(St, &array)); 2420 *S = St; 2421 PetscFunctionReturn(0); 2422 } 2423 2424 /* -------------------------------------------------------------------------------------------*/ 2425 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2426 { 2427 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2428 2429 PetscFunctionBegin; 2430 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2431 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2432 for (i = 0; i < nICNTL_pre; ++i) 2433 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2434 if (i == nICNTL_pre) { /* not already cached */ 2435 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2436 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2437 mumps->ICNTL_pre[0]++; 2438 } 2439 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2440 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2441 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2442 PetscFunctionReturn(0); 2443 } 2444 2445 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2446 { 2447 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2448 2449 PetscFunctionBegin; 2450 *ival = mumps->id.ICNTL(icntl); 2451 PetscFunctionReturn(0); 2452 } 2453 2454 /*@ 2455 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2456 2457 Logically Collective 2458 2459 Input Parameters: 2460 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2461 . icntl - index of MUMPS parameter array ICNTL() 2462 - ival - value of MUMPS ICNTL(icntl) 2463 2464 Options Database Key: 2465 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2466 2467 Level: beginner 2468 2469 References: 2470 . * - MUMPS Users' Guide 2471 2472 .seealso: `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2473 @*/ 2474 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2475 { 2476 PetscFunctionBegin; 2477 PetscValidType(F, 1); 2478 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2479 PetscValidLogicalCollectiveInt(F, icntl, 2); 2480 PetscValidLogicalCollectiveInt(F, ival, 3); 2481 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2482 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2483 PetscFunctionReturn(0); 2484 } 2485 2486 /*@ 2487 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2488 2489 Logically Collective 2490 2491 Input Parameters: 2492 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2493 - icntl - index of MUMPS parameter array ICNTL() 2494 2495 Output Parameter: 2496 . ival - value of MUMPS ICNTL(icntl) 2497 2498 Level: beginner 2499 2500 References: 2501 . * - MUMPS Users' Guide 2502 2503 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2504 @*/ 2505 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2506 { 2507 PetscFunctionBegin; 2508 PetscValidType(F, 1); 2509 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2510 PetscValidLogicalCollectiveInt(F, icntl, 2); 2511 PetscValidIntPointer(ival, 3); 2512 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2513 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2514 PetscFunctionReturn(0); 2515 } 2516 2517 /* -------------------------------------------------------------------------------------------*/ 2518 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2519 { 2520 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2521 2522 PetscFunctionBegin; 2523 if (mumps->id.job == JOB_NULL) { 2524 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2525 for (i = 0; i < nCNTL_pre; ++i) 2526 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2527 if (i == nCNTL_pre) { 2528 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2529 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2530 mumps->CNTL_pre[0]++; 2531 } 2532 mumps->CNTL_pre[1 + 2 * i] = icntl; 2533 mumps->CNTL_pre[2 + 2 * i] = val; 2534 } else mumps->id.CNTL(icntl) = val; 2535 PetscFunctionReturn(0); 2536 } 2537 2538 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2539 { 2540 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2541 2542 PetscFunctionBegin; 2543 *val = mumps->id.CNTL(icntl); 2544 PetscFunctionReturn(0); 2545 } 2546 2547 /*@ 2548 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2549 2550 Logically Collective 2551 2552 Input Parameters: 2553 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2554 . icntl - index of MUMPS parameter array CNTL() 2555 - val - value of MUMPS CNTL(icntl) 2556 2557 Options Database Key: 2558 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2559 2560 Level: beginner 2561 2562 References: 2563 . * - MUMPS Users' Guide 2564 2565 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2566 @*/ 2567 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2568 { 2569 PetscFunctionBegin; 2570 PetscValidType(F, 1); 2571 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2572 PetscValidLogicalCollectiveInt(F, icntl, 2); 2573 PetscValidLogicalCollectiveReal(F, val, 3); 2574 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2575 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2576 PetscFunctionReturn(0); 2577 } 2578 2579 /*@ 2580 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2581 2582 Logically Collective 2583 2584 Input Parameters: 2585 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2586 - icntl - index of MUMPS parameter array CNTL() 2587 2588 Output Parameter: 2589 . val - value of MUMPS CNTL(icntl) 2590 2591 Level: beginner 2592 2593 References: 2594 . * - MUMPS Users' Guide 2595 2596 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2597 @*/ 2598 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2599 { 2600 PetscFunctionBegin; 2601 PetscValidType(F, 1); 2602 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2603 PetscValidLogicalCollectiveInt(F, icntl, 2); 2604 PetscValidRealPointer(val, 3); 2605 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2606 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2607 PetscFunctionReturn(0); 2608 } 2609 2610 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2611 { 2612 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2613 2614 PetscFunctionBegin; 2615 *info = mumps->id.INFO(icntl); 2616 PetscFunctionReturn(0); 2617 } 2618 2619 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2620 { 2621 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2622 2623 PetscFunctionBegin; 2624 *infog = mumps->id.INFOG(icntl); 2625 PetscFunctionReturn(0); 2626 } 2627 2628 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2629 { 2630 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2631 2632 PetscFunctionBegin; 2633 *rinfo = mumps->id.RINFO(icntl); 2634 PetscFunctionReturn(0); 2635 } 2636 2637 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2638 { 2639 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2640 2641 PetscFunctionBegin; 2642 *rinfog = mumps->id.RINFOG(icntl); 2643 PetscFunctionReturn(0); 2644 } 2645 2646 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2647 { 2648 Mat Bt = NULL, Btseq = NULL; 2649 PetscBool flg; 2650 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2651 PetscScalar *aa; 2652 PetscInt spnr, *ia, *ja, M, nrhs; 2653 2654 PetscFunctionBegin; 2655 PetscValidPointer(spRHS, 2); 2656 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2657 if (flg) { 2658 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2659 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2660 2661 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2662 2663 if (mumps->petsc_size > 1) { 2664 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2665 Btseq = b->A; 2666 } else { 2667 Btseq = Bt; 2668 } 2669 2670 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2671 mumps->id.nrhs = nrhs; 2672 mumps->id.lrhs = M; 2673 mumps->id.rhs = NULL; 2674 2675 if (!mumps->myid) { 2676 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2677 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2678 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2679 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2680 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2681 } else { 2682 mumps->id.irhs_ptr = NULL; 2683 mumps->id.irhs_sparse = NULL; 2684 mumps->id.nz_rhs = 0; 2685 mumps->id.rhs_sparse = NULL; 2686 } 2687 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2688 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2689 2690 /* solve phase */ 2691 /*-------------*/ 2692 mumps->id.job = JOB_SOLVE; 2693 PetscMUMPS_c(mumps); 2694 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 2695 2696 if (!mumps->myid) { 2697 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 2698 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2699 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2700 } 2701 PetscFunctionReturn(0); 2702 } 2703 2704 /*@ 2705 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2706 2707 Logically Collective 2708 2709 Input Parameters: 2710 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2711 - spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format holding specified indices in processor[0] 2712 2713 Output Parameter: 2714 . spRHS - requested entries of inverse of A 2715 2716 Level: beginner 2717 2718 References: 2719 . * - MUMPS Users' Guide 2720 2721 .seealso: `MatGetFactor()`, `MatCreateTranspose()` 2722 @*/ 2723 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2724 { 2725 PetscFunctionBegin; 2726 PetscValidType(F, 1); 2727 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2728 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 2729 PetscFunctionReturn(0); 2730 } 2731 2732 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 2733 { 2734 Mat spRHS; 2735 2736 PetscFunctionBegin; 2737 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 2738 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 2739 PetscCall(MatDestroy(&spRHS)); 2740 PetscFunctionReturn(0); 2741 } 2742 2743 /*@ 2744 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2745 2746 Logically Collective 2747 2748 Input Parameters: 2749 + F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2750 - spRHST - sequential sparse matrix in `MATAIJ` format holding specified indices of A^T in processor[0] 2751 2752 Output Parameter: 2753 . spRHST - requested entries of inverse of A^T 2754 2755 Level: beginner 2756 2757 References: 2758 . * - MUMPS Users' Guide 2759 2760 .seealso: `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 2761 @*/ 2762 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 2763 { 2764 PetscBool flg; 2765 2766 PetscFunctionBegin; 2767 PetscValidType(F, 1); 2768 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2769 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 2770 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 2771 2772 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 2773 PetscFunctionReturn(0); 2774 } 2775 2776 /*@ 2777 MatMumpsGetInfo - Get MUMPS parameter INFO() 2778 2779 Logically Collective 2780 2781 Input Parameters: 2782 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2783 - icntl - index of MUMPS parameter array INFO() 2784 2785 Output Parameter: 2786 . ival - value of MUMPS INFO(icntl) 2787 2788 Level: beginner 2789 2790 References: 2791 . * - MUMPS Users' Guide 2792 2793 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2794 @*/ 2795 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 2796 { 2797 PetscFunctionBegin; 2798 PetscValidType(F, 1); 2799 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2800 PetscValidIntPointer(ival, 3); 2801 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2802 PetscFunctionReturn(0); 2803 } 2804 2805 /*@ 2806 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2807 2808 Logically Collective 2809 2810 Input Parameters: 2811 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2812 - icntl - index of MUMPS parameter array INFOG() 2813 2814 Output Parameter: 2815 . ival - value of MUMPS INFOG(icntl) 2816 2817 Level: beginner 2818 2819 References: 2820 . * - MUMPS Users' Guide 2821 2822 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2823 @*/ 2824 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 2825 { 2826 PetscFunctionBegin; 2827 PetscValidType(F, 1); 2828 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2829 PetscValidIntPointer(ival, 3); 2830 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2831 PetscFunctionReturn(0); 2832 } 2833 2834 /*@ 2835 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2836 2837 Logically Collective 2838 2839 Input Parameters: 2840 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2841 - icntl - index of MUMPS parameter array RINFO() 2842 2843 Output Parameter: 2844 . val - value of MUMPS RINFO(icntl) 2845 2846 Level: beginner 2847 2848 References: 2849 . * - MUMPS Users' Guide 2850 2851 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 2852 @*/ 2853 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 2854 { 2855 PetscFunctionBegin; 2856 PetscValidType(F, 1); 2857 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2858 PetscValidRealPointer(val, 3); 2859 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2860 PetscFunctionReturn(0); 2861 } 2862 2863 /*@ 2864 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2865 2866 Logically Collective 2867 2868 Input Parameters: 2869 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2870 - icntl - index of MUMPS parameter array RINFOG() 2871 2872 Output Parameter: 2873 . val - value of MUMPS RINFOG(icntl) 2874 2875 Level: beginner 2876 2877 References: 2878 . * - MUMPS Users' Guide 2879 2880 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 2881 @*/ 2882 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 2883 { 2884 PetscFunctionBegin; 2885 PetscValidType(F, 1); 2886 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2887 PetscValidRealPointer(val, 3); 2888 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2889 PetscFunctionReturn(0); 2890 } 2891 2892 /*MC 2893 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2894 distributed and sequential matrices via the external package MUMPS. 2895 2896 Works with `MATAIJ` and `MATSBAIJ` matrices 2897 2898 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2899 2900 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2901 2902 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2903 2904 Options Database Keys: 2905 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2906 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2907 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2908 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2909 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2910 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2911 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 2912 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2913 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2914 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2915 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2916 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2917 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2918 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 2919 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2920 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2921 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2922 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2923 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2924 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2925 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2926 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2927 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2928 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2929 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2930 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2931 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2932 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2933 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2934 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2935 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2936 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2937 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2938 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2939 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2940 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2941 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2942 2943 Level: beginner 2944 2945 Notes: 2946 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2947 2948 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 2949 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 2950 2951 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about the failure by calling 2952 $ KSPGetPC(ksp,&pc); 2953 $ PCFactorGetMatrix(pc,&mat); 2954 $ MatMumpsGetInfo(mat,....); 2955 $ MatMumpsGetInfog(mat,....); etc. 2956 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2957 2958 Using MUMPS with 64-bit integers 2959 MUMPS provides 64-bit integer support in two build modes: 2960 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 2961 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 2962 2963 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 2964 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 2965 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 2966 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 2967 2968 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 2969 2970 Two modes to run MUMPS/PETSc with OpenMP 2971 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2972 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2973 2974 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2975 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2976 2977 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2978 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2979 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2980 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2981 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2982 2983 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2984 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2985 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2986 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2987 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2988 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2989 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2990 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2991 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2992 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2993 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2994 examine the mapping result. 2995 2996 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 2997 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 2998 calls `omp_set_num_threads`(m) internally before calling MUMPS. 2999 3000 References: 3001 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3002 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3003 3004 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3005 M*/ 3006 3007 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3008 { 3009 PetscFunctionBegin; 3010 *type = MATSOLVERMUMPS; 3011 PetscFunctionReturn(0); 3012 } 3013 3014 /* MatGetFactor for Seq and MPI AIJ matrices */ 3015 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3016 { 3017 Mat B; 3018 Mat_MUMPS *mumps; 3019 PetscBool isSeqAIJ; 3020 PetscMPIInt size; 3021 3022 PetscFunctionBegin; 3023 #if defined(PETSC_USE_COMPLEX) 3024 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3025 #endif 3026 /* Create the factorization matrix */ 3027 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3028 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3029 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3030 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3031 PetscCall(MatSetUp(B)); 3032 3033 PetscCall(PetscNew(&mumps)); 3034 3035 B->ops->view = MatView_MUMPS; 3036 B->ops->getinfo = MatGetInfo_MUMPS; 3037 3038 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3039 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3040 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3041 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3042 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3043 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3044 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3045 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3046 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3047 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3048 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3049 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3050 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3051 3052 if (ftype == MAT_FACTOR_LU) { 3053 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3054 B->factortype = MAT_FACTOR_LU; 3055 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3056 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3057 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3058 mumps->sym = 0; 3059 } else { 3060 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3061 B->factortype = MAT_FACTOR_CHOLESKY; 3062 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3063 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3064 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3065 #if defined(PETSC_USE_COMPLEX) 3066 mumps->sym = 2; 3067 #else 3068 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3069 else mumps->sym = 2; 3070 #endif 3071 } 3072 3073 /* set solvertype */ 3074 PetscCall(PetscFree(B->solvertype)); 3075 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3076 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3077 if (size == 1) { 3078 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3079 B->canuseordering = PETSC_TRUE; 3080 } 3081 B->ops->destroy = MatDestroy_MUMPS; 3082 B->data = (void *)mumps; 3083 3084 *F = B; 3085 mumps->id.job = JOB_NULL; 3086 mumps->ICNTL_pre = NULL; 3087 mumps->CNTL_pre = NULL; 3088 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3089 PetscFunctionReturn(0); 3090 } 3091 3092 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3093 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3094 { 3095 Mat B; 3096 Mat_MUMPS *mumps; 3097 PetscBool isSeqSBAIJ; 3098 PetscMPIInt size; 3099 3100 PetscFunctionBegin; 3101 #if defined(PETSC_USE_COMPLEX) 3102 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3103 #endif 3104 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3105 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3106 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3107 PetscCall(MatSetUp(B)); 3108 3109 PetscCall(PetscNew(&mumps)); 3110 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3111 if (isSeqSBAIJ) { 3112 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3113 } else { 3114 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3115 } 3116 3117 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3118 B->ops->view = MatView_MUMPS; 3119 B->ops->getinfo = MatGetInfo_MUMPS; 3120 3121 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3122 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3123 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3124 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3125 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3126 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3127 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3128 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3129 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3130 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3131 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3132 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3133 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3134 3135 B->factortype = MAT_FACTOR_CHOLESKY; 3136 #if defined(PETSC_USE_COMPLEX) 3137 mumps->sym = 2; 3138 #else 3139 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3140 else mumps->sym = 2; 3141 #endif 3142 3143 /* set solvertype */ 3144 PetscCall(PetscFree(B->solvertype)); 3145 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3146 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3147 if (size == 1) { 3148 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3149 B->canuseordering = PETSC_TRUE; 3150 } 3151 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3152 B->ops->destroy = MatDestroy_MUMPS; 3153 B->data = (void *)mumps; 3154 3155 *F = B; 3156 mumps->id.job = JOB_NULL; 3157 mumps->ICNTL_pre = NULL; 3158 mumps->CNTL_pre = NULL; 3159 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3160 PetscFunctionReturn(0); 3161 } 3162 3163 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3164 { 3165 Mat B; 3166 Mat_MUMPS *mumps; 3167 PetscBool isSeqBAIJ; 3168 PetscMPIInt size; 3169 3170 PetscFunctionBegin; 3171 /* Create the factorization matrix */ 3172 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3173 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3174 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3175 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3176 PetscCall(MatSetUp(B)); 3177 3178 PetscCall(PetscNew(&mumps)); 3179 if (ftype == MAT_FACTOR_LU) { 3180 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3181 B->factortype = MAT_FACTOR_LU; 3182 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3183 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3184 mumps->sym = 0; 3185 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3186 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3187 3188 B->ops->view = MatView_MUMPS; 3189 B->ops->getinfo = MatGetInfo_MUMPS; 3190 3191 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3192 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3193 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3194 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3195 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3196 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3197 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3198 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3199 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3200 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3201 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3202 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3203 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3204 3205 /* set solvertype */ 3206 PetscCall(PetscFree(B->solvertype)); 3207 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3208 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3209 if (size == 1) { 3210 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3211 B->canuseordering = PETSC_TRUE; 3212 } 3213 B->ops->destroy = MatDestroy_MUMPS; 3214 B->data = (void *)mumps; 3215 3216 *F = B; 3217 mumps->id.job = JOB_NULL; 3218 mumps->ICNTL_pre = NULL; 3219 mumps->CNTL_pre = NULL; 3220 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3221 PetscFunctionReturn(0); 3222 } 3223 3224 /* MatGetFactor for Seq and MPI SELL matrices */ 3225 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3226 { 3227 Mat B; 3228 Mat_MUMPS *mumps; 3229 PetscBool isSeqSELL; 3230 PetscMPIInt size; 3231 3232 PetscFunctionBegin; 3233 /* Create the factorization matrix */ 3234 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3235 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3236 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3237 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3238 PetscCall(MatSetUp(B)); 3239 3240 PetscCall(PetscNew(&mumps)); 3241 3242 B->ops->view = MatView_MUMPS; 3243 B->ops->getinfo = MatGetInfo_MUMPS; 3244 3245 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3246 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3247 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3248 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3249 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3250 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3251 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3252 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3253 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3254 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3255 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3256 3257 if (ftype == MAT_FACTOR_LU) { 3258 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3259 B->factortype = MAT_FACTOR_LU; 3260 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3261 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3262 mumps->sym = 0; 3263 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3264 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3265 3266 /* set solvertype */ 3267 PetscCall(PetscFree(B->solvertype)); 3268 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3269 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3270 if (size == 1) { 3271 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3272 B->canuseordering = PETSC_TRUE; 3273 } 3274 B->ops->destroy = MatDestroy_MUMPS; 3275 B->data = (void *)mumps; 3276 3277 *F = B; 3278 mumps->id.job = JOB_NULL; 3279 mumps->ICNTL_pre = NULL; 3280 mumps->CNTL_pre = NULL; 3281 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3282 PetscFunctionReturn(0); 3283 } 3284 3285 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3286 { 3287 PetscFunctionBegin; 3288 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3289 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3290 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3291 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3292 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3293 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3294 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3295 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3296 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3297 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3298 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3299 PetscFunctionReturn(0); 3300 } 3301