1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_NULL 0 27 #define JOB_FACTSYMBOLIC 1 28 #define JOB_FACTNUMERIC 2 29 #define JOB_SOLVE 3 30 #define JOB_END -2 31 32 /* calls to MUMPS */ 33 #if defined(PETSC_USE_COMPLEX) 34 #if defined(PETSC_USE_REAL_SINGLE) 35 #define MUMPS_c cmumps_c 36 #else 37 #define MUMPS_c zmumps_c 38 #endif 39 #else 40 #if defined(PETSC_USE_REAL_SINGLE) 41 #define MUMPS_c smumps_c 42 #else 43 #define MUMPS_c dmumps_c 44 #endif 45 #endif 46 47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 48 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 49 naming convention in PetscMPIInt, PetscBLASInt etc. 50 */ 51 typedef MUMPS_INT PetscMUMPSInt; 52 53 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 54 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 55 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 56 #endif 57 #else 58 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 59 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 60 #endif 61 #endif 62 63 #define MPIU_MUMPSINT MPI_INT 64 #define PETSC_MUMPS_INT_MAX 2147483647 65 #define PETSC_MUMPS_INT_MIN -2147483648 66 67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 69 { 70 PetscFunctionBegin; 71 #if PetscDefined(USE_64BIT_INDICES) 72 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 73 #endif 74 *b = (PetscMUMPSInt)(a); 75 PetscFunctionReturn(0); 76 } 77 78 /* Put these utility routines here since they are only used in this file */ 79 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 80 { 81 PetscInt myval; 82 PetscBool myset; 83 PetscFunctionBegin; 84 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 85 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 86 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 87 if (set) *set = myset; 88 PetscFunctionReturn(0); 89 } 90 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 91 92 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 93 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 94 #define PetscMUMPS_c(mumps) \ 95 do { \ 96 if (mumps->use_petsc_omp_support) { \ 97 if (mumps->is_omp_master) { \ 98 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 99 MUMPS_c(&mumps->id); \ 100 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 101 } \ 102 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 103 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 104 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 105 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 106 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 107 */ \ 108 PetscCallMPI(MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 109 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm)); \ 110 PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 111 } else { \ 112 MUMPS_c(&mumps->id); \ 113 } \ 114 } while (0) 115 #else 116 #define PetscMUMPS_c(mumps) \ 117 do { \ 118 MUMPS_c(&mumps->id); \ 119 } while (0) 120 #endif 121 122 /* declare MumpsScalar */ 123 #if defined(PETSC_USE_COMPLEX) 124 #if defined(PETSC_USE_REAL_SINGLE) 125 #define MumpsScalar mumps_complex 126 #else 127 #define MumpsScalar mumps_double_complex 128 #endif 129 #else 130 #define MumpsScalar PetscScalar 131 #endif 132 133 /* macros s.t. indices match MUMPS documentation */ 134 #define ICNTL(I) icntl[(I)-1] 135 #define CNTL(I) cntl[(I)-1] 136 #define INFOG(I) infog[(I)-1] 137 #define INFO(I) info[(I)-1] 138 #define RINFOG(I) rinfog[(I)-1] 139 #define RINFO(I) rinfo[(I)-1] 140 141 typedef struct Mat_MUMPS Mat_MUMPS; 142 struct Mat_MUMPS { 143 #if defined(PETSC_USE_COMPLEX) 144 #if defined(PETSC_USE_REAL_SINGLE) 145 CMUMPS_STRUC_C id; 146 #else 147 ZMUMPS_STRUC_C id; 148 #endif 149 #else 150 #if defined(PETSC_USE_REAL_SINGLE) 151 SMUMPS_STRUC_C id; 152 #else 153 DMUMPS_STRUC_C id; 154 #endif 155 #endif 156 157 MatStructure matstruc; 158 PetscMPIInt myid, petsc_size; 159 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 160 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 161 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 162 PetscMUMPSInt sym; 163 MPI_Comm mumps_comm; 164 PetscMUMPSInt *ICNTL_pre; 165 PetscReal *CNTL_pre; 166 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 167 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 168 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 169 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 170 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 171 PetscInt *rhs_nrow, max_nrhs; 172 PetscMPIInt *rhs_recvcounts, *rhs_disps; 173 PetscScalar *rhs_loc, *rhs_recvbuf; 174 #endif 175 Vec b_seq, x_seq; 176 PetscInt ninfo, *info; /* which INFO to display */ 177 PetscInt sizeredrhs; 178 PetscScalar *schur_sol; 179 PetscInt schur_sizesol; 180 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 181 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 182 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 183 184 /* stuff used by petsc/mumps OpenMP support*/ 185 PetscBool use_petsc_omp_support; 186 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 187 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 188 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 189 PetscMPIInt tag, omp_comm_size; 190 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 191 MPI_Request *reqs; 192 }; 193 194 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 195 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 196 */ 197 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 198 { 199 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 200 201 PetscFunctionBegin; 202 #if defined(PETSC_USE_64BIT_INDICES) 203 { 204 PetscInt i; 205 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 206 PetscCall(PetscFree(mumps->ia_alloc)); 207 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 208 mumps->cur_ilen = nrow + 1; 209 } 210 if (nnz > mumps->cur_jlen) { 211 PetscCall(PetscFree(mumps->ja_alloc)); 212 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 213 mumps->cur_jlen = nnz; 214 } 215 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 216 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 217 *ia_mumps = mumps->ia_alloc; 218 *ja_mumps = mumps->ja_alloc; 219 } 220 #else 221 *ia_mumps = ia; 222 *ja_mumps = ja; 223 #endif 224 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 225 PetscFunctionReturn(0); 226 } 227 228 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 229 { 230 PetscFunctionBegin; 231 PetscCall(PetscFree(mumps->id.listvar_schur)); 232 PetscCall(PetscFree(mumps->id.redrhs)); 233 PetscCall(PetscFree(mumps->schur_sol)); 234 mumps->id.size_schur = 0; 235 mumps->id.schur_lld = 0; 236 mumps->id.ICNTL(19) = 0; 237 PetscFunctionReturn(0); 238 } 239 240 /* solve with rhs in mumps->id.redrhs and return in the same location */ 241 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 242 { 243 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 244 Mat S, B, X; 245 MatFactorSchurStatus schurstatus; 246 PetscInt sizesol; 247 248 PetscFunctionBegin; 249 PetscCall(MatFactorFactorizeSchurComplement(F)); 250 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 251 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 252 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 253 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 254 PetscCall(MatBindToCPU(B, S->boundtocpu)); 255 #endif 256 switch (schurstatus) { 257 case MAT_FACTOR_SCHUR_FACTORED: 258 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 259 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 260 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 261 PetscCall(MatBindToCPU(X, S->boundtocpu)); 262 #endif 263 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 264 PetscCall(MatMatSolveTranspose(S, B, X)); 265 } else { 266 PetscCall(MatMatSolve(S, B, X)); 267 } 268 break; 269 case MAT_FACTOR_SCHUR_INVERTED: 270 sizesol = mumps->id.nrhs * mumps->id.size_schur; 271 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 272 PetscCall(PetscFree(mumps->schur_sol)); 273 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 274 mumps->schur_sizesol = sizesol; 275 } 276 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 277 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 278 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 279 PetscCall(MatBindToCPU(X, S->boundtocpu)); 280 #endif 281 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 282 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 283 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 284 } else { 285 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 286 } 287 PetscCall(MatProductSetFromOptions(X)); 288 PetscCall(MatProductSymbolic(X)); 289 PetscCall(MatProductNumeric(X)); 290 291 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 292 break; 293 default: 294 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 295 } 296 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 297 PetscCall(MatDestroy(&B)); 298 PetscCall(MatDestroy(&X)); 299 PetscFunctionReturn(0); 300 } 301 302 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 303 { 304 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 305 306 PetscFunctionBegin; 307 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 308 PetscFunctionReturn(0); 309 } 310 if (!expansion) { /* prepare for the condensation step */ 311 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 312 /* allocate MUMPS internal array to store reduced right-hand sides */ 313 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 314 PetscCall(PetscFree(mumps->id.redrhs)); 315 mumps->id.lredrhs = mumps->id.size_schur; 316 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 317 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 318 } 319 mumps->id.ICNTL(26) = 1; /* condensation phase */ 320 } else { /* prepare for the expansion step */ 321 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 322 PetscCall(MatMumpsSolveSchur_Private(F)); 323 mumps->id.ICNTL(26) = 2; /* expansion phase */ 324 PetscMUMPS_c(mumps); 325 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 326 /* restore defaults */ 327 mumps->id.ICNTL(26) = -1; 328 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 329 if (mumps->id.nrhs > 1) { 330 PetscCall(PetscFree(mumps->id.redrhs)); 331 mumps->id.lredrhs = 0; 332 mumps->sizeredrhs = 0; 333 } 334 } 335 PetscFunctionReturn(0); 336 } 337 338 /* 339 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 340 341 input: 342 A - matrix in aij,baij or sbaij format 343 shift - 0: C style output triple; 1: Fortran style output triple. 344 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 345 MAT_REUSE_MATRIX: only the values in v array are updated 346 output: 347 nnz - dim of r, c, and v (number of local nonzero entries of A) 348 r, c, v - row and col index, matrix values (matrix triples) 349 350 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 351 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 352 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 353 354 */ 355 356 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 357 { 358 const PetscScalar *av; 359 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 360 PetscInt64 nz, rnz, i, j, k; 361 PetscMUMPSInt *row, *col; 362 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 363 364 PetscFunctionBegin; 365 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 366 mumps->val = (PetscScalar *)av; 367 if (reuse == MAT_INITIAL_MATRIX) { 368 nz = aa->nz; 369 ai = aa->i; 370 aj = aa->j; 371 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 372 for (i = k = 0; i < M; i++) { 373 rnz = ai[i + 1] - ai[i]; 374 ajj = aj + ai[i]; 375 for (j = 0; j < rnz; j++) { 376 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 377 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 378 k++; 379 } 380 } 381 mumps->irn = row; 382 mumps->jcn = col; 383 mumps->nnz = nz; 384 } 385 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 386 PetscFunctionReturn(0); 387 } 388 389 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 390 { 391 PetscInt64 nz, i, j, k, r; 392 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 393 PetscMUMPSInt *row, *col; 394 395 PetscFunctionBegin; 396 mumps->val = a->val; 397 if (reuse == MAT_INITIAL_MATRIX) { 398 nz = a->sliidx[a->totalslices]; 399 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 400 for (i = k = 0; i < a->totalslices; i++) { 401 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 402 } 403 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 404 mumps->irn = row; 405 mumps->jcn = col; 406 mumps->nnz = nz; 407 } 408 PetscFunctionReturn(0); 409 } 410 411 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 412 { 413 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 414 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 415 PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 416 PetscInt bs; 417 PetscMUMPSInt *row, *col; 418 419 PetscFunctionBegin; 420 PetscCall(MatGetBlockSize(A, &bs)); 421 M = A->rmap->N / bs; 422 mumps->val = aa->a; 423 if (reuse == MAT_INITIAL_MATRIX) { 424 ai = aa->i; 425 aj = aa->j; 426 nz = bs2 * aa->nz; 427 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 428 for (i = 0; i < M; i++) { 429 ajj = aj + ai[i]; 430 rnz = ai[i + 1] - ai[i]; 431 for (k = 0; k < rnz; k++) { 432 for (j = 0; j < bs; j++) { 433 for (m = 0; m < bs; m++) { 434 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 435 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 436 idx++; 437 } 438 } 439 } 440 } 441 mumps->irn = row; 442 mumps->jcn = col; 443 mumps->nnz = nz; 444 } 445 PetscFunctionReturn(0); 446 } 447 448 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 449 { 450 const PetscInt *ai, *aj, *ajj; 451 PetscInt bs; 452 PetscInt64 nz, rnz, i, j, k, m; 453 PetscMUMPSInt *row, *col; 454 PetscScalar *val; 455 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 456 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 457 #if defined(PETSC_USE_COMPLEX) 458 PetscBool isset, hermitian; 459 #endif 460 461 PetscFunctionBegin; 462 #if defined(PETSC_USE_COMPLEX) 463 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 464 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 465 #endif 466 ai = aa->i; 467 aj = aa->j; 468 PetscCall(MatGetBlockSize(A, &bs)); 469 if (reuse == MAT_INITIAL_MATRIX) { 470 const PetscInt64 alloc_size = aa->nz * bs2; 471 472 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 473 if (bs > 1) { 474 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 475 mumps->val = mumps->val_alloc; 476 } else { 477 mumps->val = aa->a; 478 } 479 mumps->irn = row; 480 mumps->jcn = col; 481 } else { 482 if (bs == 1) mumps->val = aa->a; 483 row = mumps->irn; 484 col = mumps->jcn; 485 } 486 val = mumps->val; 487 488 nz = 0; 489 if (bs > 1) { 490 for (i = 0; i < mbs; i++) { 491 rnz = ai[i + 1] - ai[i]; 492 ajj = aj + ai[i]; 493 for (j = 0; j < rnz; j++) { 494 for (k = 0; k < bs; k++) { 495 for (m = 0; m < bs; m++) { 496 if (ajj[j] > i || k >= m) { 497 if (reuse == MAT_INITIAL_MATRIX) { 498 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 499 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 500 } 501 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 502 } 503 } 504 } 505 } 506 } 507 } else if (reuse == MAT_INITIAL_MATRIX) { 508 for (i = 0; i < mbs; i++) { 509 rnz = ai[i + 1] - ai[i]; 510 ajj = aj + ai[i]; 511 for (j = 0; j < rnz; j++) { 512 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 513 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 514 nz++; 515 } 516 } 517 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 518 } 519 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 520 PetscFunctionReturn(0); 521 } 522 523 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 524 { 525 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 526 PetscInt64 nz, rnz, i, j; 527 const PetscScalar *av, *v1; 528 PetscScalar *val; 529 PetscMUMPSInt *row, *col; 530 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 531 PetscBool missing; 532 #if defined(PETSC_USE_COMPLEX) 533 PetscBool hermitian, isset; 534 #endif 535 536 PetscFunctionBegin; 537 #if defined(PETSC_USE_COMPLEX) 538 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 539 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 540 #endif 541 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 542 ai = aa->i; 543 aj = aa->j; 544 adiag = aa->diag; 545 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 546 if (reuse == MAT_INITIAL_MATRIX) { 547 /* count nz in the upper triangular part of A */ 548 nz = 0; 549 if (missing) { 550 for (i = 0; i < M; i++) { 551 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 552 for (j = ai[i]; j < ai[i + 1]; j++) { 553 if (aj[j] < i) continue; 554 nz++; 555 } 556 } else { 557 nz += ai[i + 1] - adiag[i]; 558 } 559 } 560 } else { 561 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 562 } 563 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 564 PetscCall(PetscMalloc1(nz, &val)); 565 mumps->nnz = nz; 566 mumps->irn = row; 567 mumps->jcn = col; 568 mumps->val = mumps->val_alloc = val; 569 570 nz = 0; 571 if (missing) { 572 for (i = 0; i < M; i++) { 573 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 574 for (j = ai[i]; j < ai[i + 1]; j++) { 575 if (aj[j] < i) continue; 576 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 577 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 578 val[nz] = av[j]; 579 nz++; 580 } 581 } else { 582 rnz = ai[i + 1] - adiag[i]; 583 ajj = aj + adiag[i]; 584 v1 = av + adiag[i]; 585 for (j = 0; j < rnz; j++) { 586 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 587 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 588 val[nz++] = v1[j]; 589 } 590 } 591 } 592 } else { 593 for (i = 0; i < M; i++) { 594 rnz = ai[i + 1] - adiag[i]; 595 ajj = aj + adiag[i]; 596 v1 = av + adiag[i]; 597 for (j = 0; j < rnz; j++) { 598 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 599 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 600 val[nz++] = v1[j]; 601 } 602 } 603 } 604 } else { 605 nz = 0; 606 val = mumps->val; 607 if (missing) { 608 for (i = 0; i < M; i++) { 609 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 610 for (j = ai[i]; j < ai[i + 1]; j++) { 611 if (aj[j] < i) continue; 612 val[nz++] = av[j]; 613 } 614 } else { 615 rnz = ai[i + 1] - adiag[i]; 616 v1 = av + adiag[i]; 617 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 618 } 619 } 620 } else { 621 for (i = 0; i < M; i++) { 622 rnz = ai[i + 1] - adiag[i]; 623 v1 = av + adiag[i]; 624 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 625 } 626 } 627 } 628 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 629 PetscFunctionReturn(0); 630 } 631 632 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 633 { 634 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 635 PetscInt bs; 636 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 637 PetscMUMPSInt *row, *col; 638 const PetscScalar *av, *bv, *v1, *v2; 639 PetscScalar *val; 640 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 641 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 642 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 643 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 644 #if defined(PETSC_USE_COMPLEX) 645 PetscBool hermitian, isset; 646 #endif 647 648 PetscFunctionBegin; 649 #if defined(PETSC_USE_COMPLEX) 650 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 651 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 652 #endif 653 PetscCall(MatGetBlockSize(A, &bs)); 654 rstart = A->rmap->rstart; 655 ai = aa->i; 656 aj = aa->j; 657 bi = bb->i; 658 bj = bb->j; 659 av = aa->a; 660 bv = bb->a; 661 662 garray = mat->garray; 663 664 if (reuse == MAT_INITIAL_MATRIX) { 665 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 666 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 667 PetscCall(PetscMalloc1(nz, &val)); 668 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 669 mumps->irn = row; 670 mumps->jcn = col; 671 mumps->val = mumps->val_alloc = val; 672 } else { 673 val = mumps->val; 674 } 675 676 jj = 0; 677 irow = rstart; 678 for (i = 0; i < mbs; i++) { 679 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 680 countA = ai[i + 1] - ai[i]; 681 countB = bi[i + 1] - bi[i]; 682 bjj = bj + bi[i]; 683 v1 = av + ai[i] * bs2; 684 v2 = bv + bi[i] * bs2; 685 686 if (bs > 1) { 687 /* A-part */ 688 for (j = 0; j < countA; j++) { 689 for (k = 0; k < bs; k++) { 690 for (m = 0; m < bs; m++) { 691 if (rstart + ajj[j] * bs > irow || k >= m) { 692 if (reuse == MAT_INITIAL_MATRIX) { 693 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 694 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 695 } 696 val[jj++] = v1[j * bs2 + m + k * bs]; 697 } 698 } 699 } 700 } 701 702 /* B-part */ 703 for (j = 0; j < countB; j++) { 704 for (k = 0; k < bs; k++) { 705 for (m = 0; m < bs; m++) { 706 if (reuse == MAT_INITIAL_MATRIX) { 707 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 708 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 709 } 710 val[jj++] = v2[j * bs2 + m + k * bs]; 711 } 712 } 713 } 714 } else { 715 /* A-part */ 716 for (j = 0; j < countA; j++) { 717 if (reuse == MAT_INITIAL_MATRIX) { 718 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 719 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 720 } 721 val[jj++] = v1[j]; 722 } 723 724 /* B-part */ 725 for (j = 0; j < countB; j++) { 726 if (reuse == MAT_INITIAL_MATRIX) { 727 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 728 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 729 } 730 val[jj++] = v2[j]; 731 } 732 } 733 irow += bs; 734 } 735 mumps->nnz = jj; 736 PetscFunctionReturn(0); 737 } 738 739 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 740 { 741 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 742 PetscInt64 rstart, nz, i, j, jj, irow, countA, countB; 743 PetscMUMPSInt *row, *col; 744 const PetscScalar *av, *bv, *v1, *v2; 745 PetscScalar *val; 746 Mat Ad, Ao; 747 Mat_SeqAIJ *aa; 748 Mat_SeqAIJ *bb; 749 750 PetscFunctionBegin; 751 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 752 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 753 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 754 755 aa = (Mat_SeqAIJ *)(Ad)->data; 756 bb = (Mat_SeqAIJ *)(Ao)->data; 757 ai = aa->i; 758 aj = aa->j; 759 bi = bb->i; 760 bj = bb->j; 761 762 rstart = A->rmap->rstart; 763 764 if (reuse == MAT_INITIAL_MATRIX) { 765 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 766 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 767 PetscCall(PetscMalloc1(nz, &val)); 768 mumps->nnz = nz; 769 mumps->irn = row; 770 mumps->jcn = col; 771 mumps->val = mumps->val_alloc = val; 772 } else { 773 val = mumps->val; 774 } 775 776 jj = 0; 777 irow = rstart; 778 for (i = 0; i < m; i++) { 779 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 780 countA = ai[i + 1] - ai[i]; 781 countB = bi[i + 1] - bi[i]; 782 bjj = bj + bi[i]; 783 v1 = av + ai[i]; 784 v2 = bv + bi[i]; 785 786 /* A-part */ 787 for (j = 0; j < countA; j++) { 788 if (reuse == MAT_INITIAL_MATRIX) { 789 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 790 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 791 } 792 val[jj++] = v1[j]; 793 } 794 795 /* B-part */ 796 for (j = 0; j < countB; j++) { 797 if (reuse == MAT_INITIAL_MATRIX) { 798 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 799 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 800 } 801 val[jj++] = v2[j]; 802 } 803 irow++; 804 } 805 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 806 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 807 PetscFunctionReturn(0); 808 } 809 810 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 811 { 812 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 813 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 814 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 815 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 816 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart; 817 const PetscInt bs2 = mat->bs2; 818 PetscInt bs; 819 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 820 PetscMUMPSInt *row, *col; 821 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 822 PetscScalar *val; 823 824 PetscFunctionBegin; 825 PetscCall(MatGetBlockSize(A, &bs)); 826 if (reuse == MAT_INITIAL_MATRIX) { 827 nz = bs2 * (aa->nz + bb->nz); 828 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 829 PetscCall(PetscMalloc1(nz, &val)); 830 mumps->nnz = nz; 831 mumps->irn = row; 832 mumps->jcn = col; 833 mumps->val = mumps->val_alloc = val; 834 } else { 835 val = mumps->val; 836 } 837 838 jj = 0; 839 irow = rstart; 840 for (i = 0; i < mbs; i++) { 841 countA = ai[i + 1] - ai[i]; 842 countB = bi[i + 1] - bi[i]; 843 ajj = aj + ai[i]; 844 bjj = bj + bi[i]; 845 v1 = av + bs2 * ai[i]; 846 v2 = bv + bs2 * bi[i]; 847 848 idx = 0; 849 /* A-part */ 850 for (k = 0; k < countA; k++) { 851 for (j = 0; j < bs; j++) { 852 for (n = 0; n < bs; n++) { 853 if (reuse == MAT_INITIAL_MATRIX) { 854 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 855 PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj])); 856 } 857 val[jj++] = v1[idx++]; 858 } 859 } 860 } 861 862 idx = 0; 863 /* B-part */ 864 for (k = 0; k < countB; k++) { 865 for (j = 0; j < bs; j++) { 866 for (n = 0; n < bs; n++) { 867 if (reuse == MAT_INITIAL_MATRIX) { 868 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 869 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 870 } 871 val[jj++] = v2[idx++]; 872 } 873 } 874 } 875 irow += bs; 876 } 877 PetscFunctionReturn(0); 878 } 879 880 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 881 { 882 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 883 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 884 PetscMUMPSInt *row, *col; 885 const PetscScalar *av, *bv, *v1, *v2; 886 PetscScalar *val; 887 Mat Ad, Ao; 888 Mat_SeqAIJ *aa; 889 Mat_SeqAIJ *bb; 890 #if defined(PETSC_USE_COMPLEX) 891 PetscBool hermitian, isset; 892 #endif 893 894 PetscFunctionBegin; 895 #if defined(PETSC_USE_COMPLEX) 896 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 897 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 898 #endif 899 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 900 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 901 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 902 903 aa = (Mat_SeqAIJ *)(Ad)->data; 904 bb = (Mat_SeqAIJ *)(Ao)->data; 905 ai = aa->i; 906 aj = aa->j; 907 adiag = aa->diag; 908 bi = bb->i; 909 bj = bb->j; 910 911 rstart = A->rmap->rstart; 912 913 if (reuse == MAT_INITIAL_MATRIX) { 914 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 915 nzb = 0; /* num of upper triangular entries in mat->B */ 916 for (i = 0; i < m; i++) { 917 nza += (ai[i + 1] - adiag[i]); 918 countB = bi[i + 1] - bi[i]; 919 bjj = bj + bi[i]; 920 for (j = 0; j < countB; j++) { 921 if (garray[bjj[j]] > rstart) nzb++; 922 } 923 } 924 925 nz = nza + nzb; /* total nz of upper triangular part of mat */ 926 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 927 PetscCall(PetscMalloc1(nz, &val)); 928 mumps->nnz = nz; 929 mumps->irn = row; 930 mumps->jcn = col; 931 mumps->val = mumps->val_alloc = val; 932 } else { 933 val = mumps->val; 934 } 935 936 jj = 0; 937 irow = rstart; 938 for (i = 0; i < m; i++) { 939 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 940 v1 = av + adiag[i]; 941 countA = ai[i + 1] - adiag[i]; 942 countB = bi[i + 1] - bi[i]; 943 bjj = bj + bi[i]; 944 v2 = bv + bi[i]; 945 946 /* A-part */ 947 for (j = 0; j < countA; j++) { 948 if (reuse == MAT_INITIAL_MATRIX) { 949 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 950 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 951 } 952 val[jj++] = v1[j]; 953 } 954 955 /* B-part */ 956 for (j = 0; j < countB; j++) { 957 if (garray[bjj[j]] > rstart) { 958 if (reuse == MAT_INITIAL_MATRIX) { 959 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 960 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 961 } 962 val[jj++] = v2[j]; 963 } 964 } 965 irow++; 966 } 967 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 968 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 969 PetscFunctionReturn(0); 970 } 971 972 PetscErrorCode MatDestroy_MUMPS(Mat A) 973 { 974 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 975 976 PetscFunctionBegin; 977 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 978 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 979 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 980 PetscCall(VecDestroy(&mumps->b_seq)); 981 PetscCall(VecDestroy(&mumps->x_seq)); 982 PetscCall(PetscFree(mumps->id.perm_in)); 983 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 984 PetscCall(PetscFree(mumps->val_alloc)); 985 PetscCall(PetscFree(mumps->info)); 986 PetscCall(PetscFree(mumps->ICNTL_pre)); 987 PetscCall(PetscFree(mumps->CNTL_pre)); 988 PetscCall(MatMumpsResetSchur_Private(mumps)); 989 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 990 mumps->id.job = JOB_END; 991 PetscMUMPS_c(mumps); 992 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 993 if (mumps->mumps_comm != MPI_COMM_NULL) { 994 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 995 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 996 } 997 } 998 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 999 if (mumps->use_petsc_omp_support) { 1000 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1001 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1002 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1003 } 1004 #endif 1005 PetscCall(PetscFree(mumps->ia_alloc)); 1006 PetscCall(PetscFree(mumps->ja_alloc)); 1007 PetscCall(PetscFree(mumps->recvcount)); 1008 PetscCall(PetscFree(mumps->reqs)); 1009 PetscCall(PetscFree(mumps->irhs_loc)); 1010 PetscCall(PetscFree(A->data)); 1011 1012 /* clear composed functions */ 1013 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1014 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1015 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1016 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1017 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1018 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1019 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1020 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1021 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1022 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1023 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1024 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1025 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1026 PetscFunctionReturn(0); 1027 } 1028 1029 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1030 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1031 { 1032 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1033 const PetscMPIInt ompsize = mumps->omp_comm_size; 1034 PetscInt i, m, M, rstart; 1035 1036 PetscFunctionBegin; 1037 PetscCall(MatGetSize(A, &M, NULL)); 1038 PetscCall(MatGetLocalSize(A, &m, NULL)); 1039 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1040 if (ompsize == 1) { 1041 if (!mumps->irhs_loc) { 1042 mumps->nloc_rhs = m; 1043 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1044 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1045 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1046 } 1047 mumps->id.rhs_loc = (MumpsScalar *)array; 1048 } else { 1049 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1050 const PetscInt *ranges; 1051 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1052 MPI_Group petsc_group, omp_group; 1053 PetscScalar *recvbuf = NULL; 1054 1055 if (mumps->is_omp_master) { 1056 /* Lazily initialize the omp stuff for distributed rhs */ 1057 if (!mumps->irhs_loc) { 1058 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1059 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1060 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1061 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1062 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1063 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1064 1065 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1066 mumps->nloc_rhs = 0; 1067 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1068 for (j = 0; j < ompsize; j++) { 1069 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1070 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1071 } 1072 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1073 for (j = k = 0; j < ompsize; j++) { 1074 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1075 } 1076 1077 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1078 PetscCallMPI(MPI_Group_free(&petsc_group)); 1079 PetscCallMPI(MPI_Group_free(&omp_group)); 1080 } 1081 1082 /* Realloc buffers when current nrhs is bigger than what we have met */ 1083 if (nrhs > mumps->max_nrhs) { 1084 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1085 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1086 mumps->max_nrhs = nrhs; 1087 } 1088 1089 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1090 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1091 mumps->rhs_disps[0] = 0; 1092 for (j = 1; j < ompsize; j++) { 1093 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1094 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1095 } 1096 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1097 } 1098 1099 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1100 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1101 1102 if (mumps->is_omp_master) { 1103 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1104 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1105 for (j = 0; j < ompsize; j++) { 1106 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1107 dst = dstbase; 1108 for (i = 0; i < nrhs; i++) { 1109 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1110 src += mumps->rhs_nrow[j]; 1111 dst += mumps->nloc_rhs; 1112 } 1113 dstbase += mumps->rhs_nrow[j]; 1114 } 1115 } 1116 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1117 } 1118 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1119 } 1120 mumps->id.nrhs = nrhs; 1121 mumps->id.nloc_rhs = mumps->nloc_rhs; 1122 mumps->id.lrhs_loc = mumps->nloc_rhs; 1123 mumps->id.irhs_loc = mumps->irhs_loc; 1124 PetscFunctionReturn(0); 1125 } 1126 1127 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1128 { 1129 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1130 const PetscScalar *rarray = NULL; 1131 PetscScalar *array; 1132 IS is_iden, is_petsc; 1133 PetscInt i; 1134 PetscBool second_solve = PETSC_FALSE; 1135 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1136 1137 PetscFunctionBegin; 1138 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1139 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1140 &cite1)); 1141 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1142 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1143 &cite2)); 1144 1145 if (A->factorerrortype) { 1146 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1147 PetscCall(VecSetInf(x)); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 mumps->id.nrhs = 1; 1152 if (mumps->petsc_size > 1) { 1153 if (mumps->ICNTL20 == 10) { 1154 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1155 PetscCall(VecGetArrayRead(b, &rarray)); 1156 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1157 } else { 1158 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1159 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1160 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1161 if (!mumps->myid) { 1162 PetscCall(VecGetArray(mumps->b_seq, &array)); 1163 mumps->id.rhs = (MumpsScalar *)array; 1164 } 1165 } 1166 } else { /* petsc_size == 1 */ 1167 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1168 PetscCall(VecCopy(b, x)); 1169 PetscCall(VecGetArray(x, &array)); 1170 mumps->id.rhs = (MumpsScalar *)array; 1171 } 1172 1173 /* 1174 handle condensation step of Schur complement (if any) 1175 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1176 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1177 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1178 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1179 */ 1180 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1181 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1182 second_solve = PETSC_TRUE; 1183 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1184 } 1185 /* solve phase */ 1186 /*-------------*/ 1187 mumps->id.job = JOB_SOLVE; 1188 PetscMUMPS_c(mumps); 1189 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1190 1191 /* handle expansion step of Schur complement (if any) */ 1192 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1193 1194 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1195 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1196 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1197 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1198 } 1199 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1200 PetscInt *isol2_loc = NULL; 1201 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1202 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1203 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1204 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1205 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1206 PetscCall(ISDestroy(&is_iden)); 1207 PetscCall(ISDestroy(&is_petsc)); 1208 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1209 } 1210 1211 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1212 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1213 } 1214 1215 if (mumps->petsc_size > 1) { 1216 if (mumps->ICNTL20 == 10) { 1217 PetscCall(VecRestoreArrayRead(b, &rarray)); 1218 } else if (!mumps->myid) { 1219 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1220 } 1221 } else PetscCall(VecRestoreArray(x, &array)); 1222 1223 PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3))); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1228 { 1229 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1230 1231 PetscFunctionBegin; 1232 mumps->id.ICNTL(9) = 0; 1233 PetscCall(MatSolve_MUMPS(A, b, x)); 1234 mumps->id.ICNTL(9) = 1; 1235 PetscFunctionReturn(0); 1236 } 1237 1238 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1239 { 1240 Mat Bt = NULL; 1241 PetscBool denseX, denseB, flg, flgT; 1242 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1243 PetscInt i, nrhs, M; 1244 PetscScalar *array; 1245 const PetscScalar *rbray; 1246 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1247 PetscMUMPSInt *isol_loc, *isol_loc_save; 1248 PetscScalar *bray, *sol_loc, *sol_loc_save; 1249 IS is_to, is_from; 1250 PetscInt k, proc, j, m, myrstart; 1251 const PetscInt *rstart; 1252 Vec v_mpi, msol_loc; 1253 VecScatter scat_sol; 1254 Vec b_seq; 1255 VecScatter scat_rhs; 1256 PetscScalar *aa; 1257 PetscInt spnr, *ia, *ja; 1258 Mat_MPIAIJ *b = NULL; 1259 1260 PetscFunctionBegin; 1261 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1262 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1263 1264 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1265 if (denseB) { 1266 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1267 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1268 } else { /* sparse B */ 1269 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1270 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1271 if (flgT) { /* input B is transpose of actural RHS matrix, 1272 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1273 PetscCall(MatTransposeGetMat(B, &Bt)); 1274 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1275 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1276 } 1277 1278 PetscCall(MatGetSize(B, &M, &nrhs)); 1279 mumps->id.nrhs = nrhs; 1280 mumps->id.lrhs = M; 1281 mumps->id.rhs = NULL; 1282 1283 if (mumps->petsc_size == 1) { 1284 PetscScalar *aa; 1285 PetscInt spnr, *ia, *ja; 1286 PetscBool second_solve = PETSC_FALSE; 1287 1288 PetscCall(MatDenseGetArray(X, &array)); 1289 mumps->id.rhs = (MumpsScalar *)array; 1290 1291 if (denseB) { 1292 /* copy B to X */ 1293 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1294 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1295 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1296 } else { /* sparse B */ 1297 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1298 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1299 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1300 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1301 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1302 } 1303 /* handle condensation step of Schur complement (if any) */ 1304 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1305 second_solve = PETSC_TRUE; 1306 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1307 } 1308 /* solve phase */ 1309 /*-------------*/ 1310 mumps->id.job = JOB_SOLVE; 1311 PetscMUMPS_c(mumps); 1312 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1313 1314 /* handle expansion step of Schur complement (if any) */ 1315 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1316 if (!denseB) { /* sparse B */ 1317 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1318 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1319 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1320 } 1321 PetscCall(MatDenseRestoreArray(X, &array)); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1326 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1327 1328 /* create msol_loc to hold mumps local solution */ 1329 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1330 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1331 1332 lsol_loc = mumps->id.lsol_loc; 1333 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1334 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1335 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1336 mumps->id.isol_loc = isol_loc; 1337 1338 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1339 1340 if (denseB) { 1341 if (mumps->ICNTL20 == 10) { 1342 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1343 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1344 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1345 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1346 PetscCall(MatGetLocalSize(B, &m, NULL)); 1347 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1348 } else { 1349 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1350 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1351 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1352 0, re-arrange B into desired order, which is a local operation. 1353 */ 1354 1355 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1356 /* wrap dense rhs matrix B into a vector v_mpi */ 1357 PetscCall(MatGetLocalSize(B, &m, NULL)); 1358 PetscCall(MatDenseGetArray(B, &bray)); 1359 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1360 PetscCall(MatDenseRestoreArray(B, &bray)); 1361 1362 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1363 if (!mumps->myid) { 1364 PetscInt *idx; 1365 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1366 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1367 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1368 k = 0; 1369 for (proc = 0; proc < mumps->petsc_size; proc++) { 1370 for (j = 0; j < nrhs; j++) { 1371 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1372 } 1373 } 1374 1375 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1376 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1377 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1378 } else { 1379 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1380 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1381 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1382 } 1383 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1384 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1385 PetscCall(ISDestroy(&is_to)); 1386 PetscCall(ISDestroy(&is_from)); 1387 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1388 1389 if (!mumps->myid) { /* define rhs on the host */ 1390 PetscCall(VecGetArray(b_seq, &bray)); 1391 mumps->id.rhs = (MumpsScalar *)bray; 1392 PetscCall(VecRestoreArray(b_seq, &bray)); 1393 } 1394 } 1395 } else { /* sparse B */ 1396 b = (Mat_MPIAIJ *)Bt->data; 1397 1398 /* wrap dense X into a vector v_mpi */ 1399 PetscCall(MatGetLocalSize(X, &m, NULL)); 1400 PetscCall(MatDenseGetArray(X, &bray)); 1401 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1402 PetscCall(MatDenseRestoreArray(X, &bray)); 1403 1404 if (!mumps->myid) { 1405 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1406 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1407 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1408 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1409 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1410 } else { 1411 mumps->id.irhs_ptr = NULL; 1412 mumps->id.irhs_sparse = NULL; 1413 mumps->id.nz_rhs = 0; 1414 mumps->id.rhs_sparse = NULL; 1415 } 1416 } 1417 1418 /* solve phase */ 1419 /*-------------*/ 1420 mumps->id.job = JOB_SOLVE; 1421 PetscMUMPS_c(mumps); 1422 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1423 1424 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1425 PetscCall(MatDenseGetArray(X, &array)); 1426 PetscCall(VecPlaceArray(v_mpi, array)); 1427 1428 /* create scatter scat_sol */ 1429 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1430 /* iidx: index for scatter mumps solution to petsc X */ 1431 1432 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1433 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1434 for (i = 0; i < lsol_loc; i++) { 1435 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1436 1437 for (proc = 0; proc < mumps->petsc_size; proc++) { 1438 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1439 myrstart = rstart[proc]; 1440 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1441 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1442 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1443 break; 1444 } 1445 } 1446 1447 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1448 } 1449 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1450 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1451 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1452 PetscCall(ISDestroy(&is_from)); 1453 PetscCall(ISDestroy(&is_to)); 1454 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1455 PetscCall(MatDenseRestoreArray(X, &array)); 1456 1457 /* free spaces */ 1458 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1459 mumps->id.isol_loc = isol_loc_save; 1460 1461 PetscCall(PetscFree2(sol_loc, isol_loc)); 1462 PetscCall(PetscFree(idxx)); 1463 PetscCall(VecDestroy(&msol_loc)); 1464 PetscCall(VecDestroy(&v_mpi)); 1465 if (!denseB) { 1466 if (!mumps->myid) { 1467 b = (Mat_MPIAIJ *)Bt->data; 1468 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1469 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1470 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1471 } 1472 } else { 1473 if (mumps->ICNTL20 == 0) { 1474 PetscCall(VecDestroy(&b_seq)); 1475 PetscCall(VecScatterDestroy(&scat_rhs)); 1476 } 1477 } 1478 PetscCall(VecScatterDestroy(&scat_sol)); 1479 PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3))); 1480 PetscFunctionReturn(0); 1481 } 1482 1483 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1484 { 1485 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1486 PetscMUMPSInt oldvalue = mumps->id.ICNTL(9); 1487 1488 PetscFunctionBegin; 1489 mumps->id.ICNTL(9) = 0; 1490 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1491 mumps->id.ICNTL(9) = oldvalue; 1492 PetscFunctionReturn(0); 1493 } 1494 1495 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1496 { 1497 PetscBool flg; 1498 Mat B; 1499 1500 PetscFunctionBegin; 1501 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1502 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1503 1504 /* Create B=Bt^T that uses Bt's data structure */ 1505 PetscCall(MatCreateTranspose(Bt, &B)); 1506 1507 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1508 PetscCall(MatDestroy(&B)); 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #if !defined(PETSC_USE_COMPLEX) 1513 /* 1514 input: 1515 F: numeric factor 1516 output: 1517 nneg: total number of negative pivots 1518 nzero: total number of zero pivots 1519 npos: (global dimension of F) - nneg - nzero 1520 */ 1521 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1522 { 1523 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1524 PetscMPIInt size; 1525 1526 PetscFunctionBegin; 1527 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1528 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1529 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1530 1531 if (nneg) *nneg = mumps->id.INFOG(12); 1532 if (nzero || npos) { 1533 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1534 if (nzero) *nzero = mumps->id.INFOG(28); 1535 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1536 } 1537 PetscFunctionReturn(0); 1538 } 1539 #endif 1540 1541 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1542 { 1543 PetscInt i, nreqs; 1544 PetscMUMPSInt *irn, *jcn; 1545 PetscMPIInt count; 1546 PetscInt64 totnnz, remain; 1547 const PetscInt osize = mumps->omp_comm_size; 1548 PetscScalar *val; 1549 1550 PetscFunctionBegin; 1551 if (osize > 1) { 1552 if (reuse == MAT_INITIAL_MATRIX) { 1553 /* master first gathers counts of nonzeros to receive */ 1554 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1555 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1556 1557 /* Then each computes number of send/recvs */ 1558 if (mumps->is_omp_master) { 1559 /* Start from 1 since self communication is not done in MPI */ 1560 nreqs = 0; 1561 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1562 } else { 1563 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1564 } 1565 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1566 1567 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1568 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1569 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1570 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1571 */ 1572 nreqs = 0; /* counter for actual send/recvs */ 1573 if (mumps->is_omp_master) { 1574 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1575 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1576 PetscCall(PetscMalloc1(totnnz, &val)); 1577 1578 /* Self communication */ 1579 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1580 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1581 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1582 1583 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1584 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1585 PetscCall(PetscFree(mumps->val_alloc)); 1586 mumps->nnz = totnnz; 1587 mumps->irn = irn; 1588 mumps->jcn = jcn; 1589 mumps->val = mumps->val_alloc = val; 1590 1591 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1592 jcn += mumps->recvcount[0]; 1593 val += mumps->recvcount[0]; 1594 1595 /* Remote communication */ 1596 for (i = 1; i < osize; i++) { 1597 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1598 remain = mumps->recvcount[i] - count; 1599 while (count > 0) { 1600 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1601 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1602 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1603 irn += count; 1604 jcn += count; 1605 val += count; 1606 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1607 remain -= count; 1608 } 1609 } 1610 } else { 1611 irn = mumps->irn; 1612 jcn = mumps->jcn; 1613 val = mumps->val; 1614 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1615 remain = mumps->nnz - count; 1616 while (count > 0) { 1617 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1618 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1619 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1620 irn += count; 1621 jcn += count; 1622 val += count; 1623 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1624 remain -= count; 1625 } 1626 } 1627 } else { 1628 nreqs = 0; 1629 if (mumps->is_omp_master) { 1630 val = mumps->val + mumps->recvcount[0]; 1631 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1632 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1633 remain = mumps->recvcount[i] - count; 1634 while (count > 0) { 1635 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1636 val += count; 1637 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1638 remain -= count; 1639 } 1640 } 1641 } else { 1642 val = mumps->val; 1643 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1644 remain = mumps->nnz - count; 1645 while (count > 0) { 1646 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1647 val += count; 1648 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1649 remain -= count; 1650 } 1651 } 1652 } 1653 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1654 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1655 } 1656 PetscFunctionReturn(0); 1657 } 1658 1659 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1660 { 1661 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1662 PetscBool isMPIAIJ; 1663 1664 PetscFunctionBegin; 1665 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1666 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1667 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1668 PetscFunctionReturn(0); 1669 } 1670 1671 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1672 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1673 1674 /* numerical factorization phase */ 1675 /*-------------------------------*/ 1676 mumps->id.job = JOB_FACTNUMERIC; 1677 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1678 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1679 } else { 1680 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1681 } 1682 PetscMUMPS_c(mumps); 1683 if (mumps->id.INFOG(1) < 0) { 1684 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 1685 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1686 PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1687 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1688 } else if (mumps->id.INFOG(1) == -13) { 1689 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1690 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1691 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1692 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1693 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1694 } else { 1695 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1696 F->factorerrortype = MAT_FACTOR_OTHER; 1697 } 1698 } 1699 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, " mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16)); 1700 1701 F->assembled = PETSC_TRUE; 1702 1703 if (F->schur) { /* reset Schur status to unfactored */ 1704 #if defined(PETSC_HAVE_CUDA) 1705 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1706 #endif 1707 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1708 mumps->id.ICNTL(19) = 2; 1709 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1710 } 1711 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1712 } 1713 1714 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1715 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1716 1717 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1718 if (mumps->petsc_size > 1) { 1719 PetscInt lsol_loc; 1720 PetscScalar *sol_loc; 1721 1722 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1723 1724 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1725 if (mumps->x_seq) { 1726 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1727 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1728 PetscCall(VecDestroy(&mumps->x_seq)); 1729 } 1730 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1731 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1732 mumps->id.lsol_loc = lsol_loc; 1733 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1734 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 1735 } 1736 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1737 PetscFunctionReturn(0); 1738 } 1739 1740 /* Sets MUMPS options from the options database */ 1741 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1742 { 1743 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1744 PetscMUMPSInt icntl = 0, size, *listvar_schur; 1745 PetscInt info[80], i, ninfo = 80, rbs, cbs; 1746 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1747 MumpsScalar *arr; 1748 1749 PetscFunctionBegin; 1750 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 1751 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1752 PetscInt nthreads = 0; 1753 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1754 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1755 1756 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1757 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 1758 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 1759 1760 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 1761 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1762 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1763 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 1764 if (mumps->use_petsc_omp_support) { 1765 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 1766 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1767 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1768 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1769 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 1770 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 1771 #endif 1772 } else { 1773 mumps->omp_comm = PETSC_COMM_SELF; 1774 mumps->mumps_comm = mumps->petsc_comm; 1775 mumps->is_omp_master = PETSC_TRUE; 1776 } 1777 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 1778 mumps->reqs = NULL; 1779 mumps->tag = 0; 1780 1781 if (mumps->mumps_comm != MPI_COMM_NULL) { 1782 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 1783 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1784 MPI_Comm comm; 1785 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 1786 mumps->mumps_comm = comm; 1787 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 1788 } 1789 1790 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1791 mumps->id.job = JOB_INIT; 1792 mumps->id.par = 1; /* host participates factorizaton and solve */ 1793 mumps->id.sym = mumps->sym; 1794 1795 size = mumps->id.size_schur; 1796 arr = mumps->id.schur; 1797 listvar_schur = mumps->id.listvar_schur; 1798 PetscMUMPS_c(mumps); 1799 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1800 /* restore cached ICNTL and CNTL values */ 1801 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 1802 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 1803 PetscCall(PetscFree(mumps->ICNTL_pre)); 1804 PetscCall(PetscFree(mumps->CNTL_pre)); 1805 1806 if (schur) { 1807 mumps->id.size_schur = size; 1808 mumps->id.schur_lld = size; 1809 mumps->id.schur = arr; 1810 mumps->id.listvar_schur = listvar_schur; 1811 if (mumps->petsc_size > 1) { 1812 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 1813 1814 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1815 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1816 PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 1817 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 1818 } else { 1819 if (F->factortype == MAT_FACTOR_LU) { 1820 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1821 } else { 1822 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1823 } 1824 } 1825 mumps->id.ICNTL(26) = -1; 1826 } 1827 1828 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1829 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1830 */ 1831 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 1832 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 1833 1834 mumps->scat_rhs = NULL; 1835 mumps->scat_sol = NULL; 1836 1837 /* set PETSc-MUMPS default options - override MUMPS default */ 1838 mumps->id.ICNTL(3) = 0; 1839 mumps->id.ICNTL(4) = 0; 1840 if (mumps->petsc_size == 1) { 1841 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1842 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1843 } else { 1844 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1845 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1846 } 1847 } 1848 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 1849 if (flg) mumps->id.ICNTL(1) = icntl; 1850 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 1851 if (flg) mumps->id.ICNTL(2) = icntl; 1852 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 1853 if (flg) mumps->id.ICNTL(3) = icntl; 1854 1855 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 1856 if (flg) mumps->id.ICNTL(4) = icntl; 1857 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1858 1859 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 1860 if (flg) mumps->id.ICNTL(6) = icntl; 1861 1862 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 1863 if (flg) { 1864 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 1865 mumps->id.ICNTL(7) = icntl; 1866 } 1867 1868 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 1869 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 1870 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 1871 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 1872 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 1873 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 1874 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 1875 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 1876 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 1877 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 1878 if (flg) { 1879 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 1880 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 1881 } 1882 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 1883 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1884 PetscCall(MatDestroy(&F->schur)); 1885 PetscCall(MatMumpsResetSchur_Private(mumps)); 1886 } 1887 1888 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 1889 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1890 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1891 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 1892 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 1893 In short, we could not use distributed RHS with MPICH until v4.0b1. 1894 */ 1895 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 1896 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1897 #else 1898 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1899 #endif 1900 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 1901 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 1902 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 1903 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 1904 #endif 1905 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 1906 1907 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 1908 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 1909 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 1910 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 1911 1912 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 1913 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 1914 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 1915 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 1916 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 1917 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 1918 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 1919 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 1920 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 1921 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 1922 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 1923 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 1924 1925 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 1926 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 1927 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 1928 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 1929 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 1930 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 1931 1932 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 1933 1934 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 1935 if (ninfo) { 1936 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 1937 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 1938 mumps->ninfo = ninfo; 1939 for (i = 0; i < ninfo; i++) { 1940 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 1941 mumps->info[i] = info[i]; 1942 } 1943 } 1944 PetscOptionsEnd(); 1945 PetscFunctionReturn(0); 1946 } 1947 1948 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 1949 { 1950 PetscFunctionBegin; 1951 if (mumps->id.INFOG(1) < 0) { 1952 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1953 if (mumps->id.INFOG(1) == -6) { 1954 PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1955 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1956 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1957 PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1958 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1959 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1960 PetscCall(PetscInfo(F, "Empty matrix\n")); 1961 } else { 1962 PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1963 F->factorerrortype = MAT_FACTOR_OTHER; 1964 } 1965 } 1966 PetscFunctionReturn(0); 1967 } 1968 1969 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 1970 { 1971 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1972 Vec b; 1973 const PetscInt M = A->rmap->N; 1974 1975 PetscFunctionBegin; 1976 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 1977 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 1978 PetscFunctionReturn(0); 1979 } 1980 1981 /* Set MUMPS options from the options database */ 1982 PetscCall(MatSetFromOptions_MUMPS(F, A)); 1983 1984 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 1985 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 1986 1987 /* analysis phase */ 1988 /*----------------*/ 1989 mumps->id.job = JOB_FACTSYMBOLIC; 1990 mumps->id.n = M; 1991 switch (mumps->id.ICNTL(18)) { 1992 case 0: /* centralized assembled matrix input */ 1993 if (!mumps->myid) { 1994 mumps->id.nnz = mumps->nnz; 1995 mumps->id.irn = mumps->irn; 1996 mumps->id.jcn = mumps->jcn; 1997 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 1998 if (r) { 1999 mumps->id.ICNTL(7) = 1; 2000 if (!mumps->myid) { 2001 const PetscInt *idx; 2002 PetscInt i; 2003 2004 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2005 PetscCall(ISGetIndices(r, &idx)); 2006 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2007 PetscCall(ISRestoreIndices(r, &idx)); 2008 } 2009 } 2010 } 2011 break; 2012 case 3: /* distributed assembled matrix input (size>1) */ 2013 mumps->id.nnz_loc = mumps->nnz; 2014 mumps->id.irn_loc = mumps->irn; 2015 mumps->id.jcn_loc = mumps->jcn; 2016 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2017 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2018 PetscCall(MatCreateVecs(A, NULL, &b)); 2019 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2020 PetscCall(VecDestroy(&b)); 2021 } 2022 break; 2023 } 2024 PetscMUMPS_c(mumps); 2025 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2026 2027 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2028 F->ops->solve = MatSolve_MUMPS; 2029 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2030 F->ops->matsolve = MatMatSolve_MUMPS; 2031 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2032 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2033 2034 mumps->matstruc = SAME_NONZERO_PATTERN; 2035 PetscFunctionReturn(0); 2036 } 2037 2038 /* Note the Petsc r and c permutations are ignored */ 2039 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2040 { 2041 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2042 Vec b; 2043 const PetscInt M = A->rmap->N; 2044 2045 PetscFunctionBegin; 2046 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2047 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2048 PetscFunctionReturn(0); 2049 } 2050 2051 /* Set MUMPS options from the options database */ 2052 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2053 2054 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2055 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2056 2057 /* analysis phase */ 2058 /*----------------*/ 2059 mumps->id.job = JOB_FACTSYMBOLIC; 2060 mumps->id.n = M; 2061 switch (mumps->id.ICNTL(18)) { 2062 case 0: /* centralized assembled matrix input */ 2063 if (!mumps->myid) { 2064 mumps->id.nnz = mumps->nnz; 2065 mumps->id.irn = mumps->irn; 2066 mumps->id.jcn = mumps->jcn; 2067 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2068 } 2069 break; 2070 case 3: /* distributed assembled matrix input (size>1) */ 2071 mumps->id.nnz_loc = mumps->nnz; 2072 mumps->id.irn_loc = mumps->irn; 2073 mumps->id.jcn_loc = mumps->jcn; 2074 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2075 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2076 PetscCall(MatCreateVecs(A, NULL, &b)); 2077 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2078 PetscCall(VecDestroy(&b)); 2079 } 2080 break; 2081 } 2082 PetscMUMPS_c(mumps); 2083 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2084 2085 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2086 F->ops->solve = MatSolve_MUMPS; 2087 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2088 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2089 2090 mumps->matstruc = SAME_NONZERO_PATTERN; 2091 PetscFunctionReturn(0); 2092 } 2093 2094 /* Note the Petsc r permutation and factor info are ignored */ 2095 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2096 { 2097 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2098 Vec b; 2099 const PetscInt M = A->rmap->N; 2100 2101 PetscFunctionBegin; 2102 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2103 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2104 PetscFunctionReturn(0); 2105 } 2106 2107 /* Set MUMPS options from the options database */ 2108 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2109 2110 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2111 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2112 2113 /* analysis phase */ 2114 /*----------------*/ 2115 mumps->id.job = JOB_FACTSYMBOLIC; 2116 mumps->id.n = M; 2117 switch (mumps->id.ICNTL(18)) { 2118 case 0: /* centralized assembled matrix input */ 2119 if (!mumps->myid) { 2120 mumps->id.nnz = mumps->nnz; 2121 mumps->id.irn = mumps->irn; 2122 mumps->id.jcn = mumps->jcn; 2123 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2124 } 2125 break; 2126 case 3: /* distributed assembled matrix input (size>1) */ 2127 mumps->id.nnz_loc = mumps->nnz; 2128 mumps->id.irn_loc = mumps->irn; 2129 mumps->id.jcn_loc = mumps->jcn; 2130 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2131 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2132 PetscCall(MatCreateVecs(A, NULL, &b)); 2133 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2134 PetscCall(VecDestroy(&b)); 2135 } 2136 break; 2137 } 2138 PetscMUMPS_c(mumps); 2139 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2140 2141 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2142 F->ops->solve = MatSolve_MUMPS; 2143 F->ops->solvetranspose = MatSolve_MUMPS; 2144 F->ops->matsolve = MatMatSolve_MUMPS; 2145 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2146 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2147 #if defined(PETSC_USE_COMPLEX) 2148 F->ops->getinertia = NULL; 2149 #else 2150 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2151 #endif 2152 2153 mumps->matstruc = SAME_NONZERO_PATTERN; 2154 PetscFunctionReturn(0); 2155 } 2156 2157 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2158 { 2159 PetscBool iascii; 2160 PetscViewerFormat format; 2161 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2162 2163 PetscFunctionBegin; 2164 /* check if matrix is mumps type */ 2165 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2166 2167 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2168 if (iascii) { 2169 PetscCall(PetscViewerGetFormat(viewer, &format)); 2170 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2171 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2172 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2173 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2174 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2175 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2176 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2177 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2178 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2179 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2180 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2181 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2182 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2183 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2184 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2185 if (mumps->id.ICNTL(11) > 0) { 2186 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2187 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2188 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2189 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2190 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2191 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2192 } 2193 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2194 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2195 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2196 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2197 /* ICNTL(15-17) not used */ 2198 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2199 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2200 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2201 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2202 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2203 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2204 2205 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2206 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2207 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2208 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2209 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2210 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2211 2212 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2213 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2214 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2215 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2216 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2217 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2218 2219 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2220 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2221 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2222 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2223 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2224 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2225 2226 /* information local to each processor */ 2227 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2228 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2229 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2230 PetscCall(PetscViewerFlush(viewer)); 2231 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2232 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2233 PetscCall(PetscViewerFlush(viewer)); 2234 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2235 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2236 PetscCall(PetscViewerFlush(viewer)); 2237 2238 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2239 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2240 PetscCall(PetscViewerFlush(viewer)); 2241 2242 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2243 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2244 PetscCall(PetscViewerFlush(viewer)); 2245 2246 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2247 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2248 PetscCall(PetscViewerFlush(viewer)); 2249 2250 if (mumps->ninfo && mumps->ninfo <= 80) { 2251 PetscInt i; 2252 for (i = 0; i < mumps->ninfo; i++) { 2253 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2254 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2255 PetscCall(PetscViewerFlush(viewer)); 2256 } 2257 } 2258 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2259 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2260 2261 if (mumps->myid == 0) { /* information from the host */ 2262 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2263 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2264 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2265 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2266 2267 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2268 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2269 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2270 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2271 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2272 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2273 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2274 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2275 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2276 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2277 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2278 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2279 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2280 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2281 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2282 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2283 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2284 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2285 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2286 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2287 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2288 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2289 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2290 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2291 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2292 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2293 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2294 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2295 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2296 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2297 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2298 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2299 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2300 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2301 } 2302 } 2303 } 2304 PetscFunctionReturn(0); 2305 } 2306 2307 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2308 { 2309 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2310 2311 PetscFunctionBegin; 2312 info->block_size = 1.0; 2313 info->nz_allocated = mumps->id.INFOG(20); 2314 info->nz_used = mumps->id.INFOG(20); 2315 info->nz_unneeded = 0.0; 2316 info->assemblies = 0.0; 2317 info->mallocs = 0.0; 2318 info->memory = 0.0; 2319 info->fill_ratio_given = 0; 2320 info->fill_ratio_needed = 0; 2321 info->factor_mallocs = 0; 2322 PetscFunctionReturn(0); 2323 } 2324 2325 /* -------------------------------------------------------------------------------------------*/ 2326 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2327 { 2328 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2329 const PetscScalar *arr; 2330 const PetscInt *idxs; 2331 PetscInt size, i; 2332 2333 PetscFunctionBegin; 2334 PetscCall(ISGetLocalSize(is, &size)); 2335 /* Schur complement matrix */ 2336 PetscCall(MatDestroy(&F->schur)); 2337 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2338 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2339 mumps->id.schur = (MumpsScalar *)arr; 2340 mumps->id.size_schur = size; 2341 mumps->id.schur_lld = size; 2342 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2343 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2344 2345 /* MUMPS expects Fortran style indices */ 2346 PetscCall(PetscFree(mumps->id.listvar_schur)); 2347 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2348 PetscCall(ISGetIndices(is, &idxs)); 2349 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2350 PetscCall(ISRestoreIndices(is, &idxs)); 2351 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2352 mumps->id.ICNTL(26) = -1; 2353 PetscFunctionReturn(0); 2354 } 2355 2356 /* -------------------------------------------------------------------------------------------*/ 2357 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2358 { 2359 Mat St; 2360 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2361 PetscScalar *array; 2362 #if defined(PETSC_USE_COMPLEX) 2363 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2364 #endif 2365 2366 PetscFunctionBegin; 2367 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2368 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2369 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2370 PetscCall(MatSetType(St, MATDENSE)); 2371 PetscCall(MatSetUp(St)); 2372 PetscCall(MatDenseGetArray(St, &array)); 2373 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2374 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2375 PetscInt i, j, N = mumps->id.size_schur; 2376 for (i = 0; i < N; i++) { 2377 for (j = 0; j < N; j++) { 2378 #if !defined(PETSC_USE_COMPLEX) 2379 PetscScalar val = mumps->id.schur[i * N + j]; 2380 #else 2381 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2382 #endif 2383 array[j * N + i] = val; 2384 } 2385 } 2386 } else { /* stored by columns */ 2387 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2388 } 2389 } else { /* either full or lower-triangular (not packed) */ 2390 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2391 PetscInt i, j, N = mumps->id.size_schur; 2392 for (i = 0; i < N; i++) { 2393 for (j = i; j < N; j++) { 2394 #if !defined(PETSC_USE_COMPLEX) 2395 PetscScalar val = mumps->id.schur[i * N + j]; 2396 #else 2397 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2398 #endif 2399 array[i * N + j] = val; 2400 array[j * N + i] = val; 2401 } 2402 } 2403 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2404 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2405 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2406 PetscInt i, j, N = mumps->id.size_schur; 2407 for (i = 0; i < N; i++) { 2408 for (j = 0; j < i + 1; j++) { 2409 #if !defined(PETSC_USE_COMPLEX) 2410 PetscScalar val = mumps->id.schur[i * N + j]; 2411 #else 2412 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2413 #endif 2414 array[i * N + j] = val; 2415 array[j * N + i] = val; 2416 } 2417 } 2418 } 2419 } 2420 PetscCall(MatDenseRestoreArray(St, &array)); 2421 *S = St; 2422 PetscFunctionReturn(0); 2423 } 2424 2425 /* -------------------------------------------------------------------------------------------*/ 2426 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2427 { 2428 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2429 2430 PetscFunctionBegin; 2431 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2432 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2433 for (i = 0; i < nICNTL_pre; ++i) 2434 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2435 if (i == nICNTL_pre) { /* not already cached */ 2436 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2437 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2438 mumps->ICNTL_pre[0]++; 2439 } 2440 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2441 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2442 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2443 PetscFunctionReturn(0); 2444 } 2445 2446 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2447 { 2448 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2449 2450 PetscFunctionBegin; 2451 *ival = mumps->id.ICNTL(icntl); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 /*@ 2456 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2457 2458 Logically Collective 2459 2460 Input Parameters: 2461 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2462 . icntl - index of MUMPS parameter array ICNTL() 2463 - ival - value of MUMPS ICNTL(icntl) 2464 2465 Options Database Key: 2466 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2467 2468 Level: beginner 2469 2470 References: 2471 . * - MUMPS Users' Guide 2472 2473 .seealso: `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2474 @*/ 2475 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2476 { 2477 PetscFunctionBegin; 2478 PetscValidType(F, 1); 2479 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2480 PetscValidLogicalCollectiveInt(F, icntl, 2); 2481 PetscValidLogicalCollectiveInt(F, ival, 3); 2482 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2483 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2484 PetscFunctionReturn(0); 2485 } 2486 2487 /*@ 2488 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2489 2490 Logically Collective 2491 2492 Input Parameters: 2493 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2494 - icntl - index of MUMPS parameter array ICNTL() 2495 2496 Output Parameter: 2497 . ival - value of MUMPS ICNTL(icntl) 2498 2499 Level: beginner 2500 2501 References: 2502 . * - MUMPS Users' Guide 2503 2504 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2505 @*/ 2506 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2507 { 2508 PetscFunctionBegin; 2509 PetscValidType(F, 1); 2510 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2511 PetscValidLogicalCollectiveInt(F, icntl, 2); 2512 PetscValidIntPointer(ival, 3); 2513 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2514 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2515 PetscFunctionReturn(0); 2516 } 2517 2518 /* -------------------------------------------------------------------------------------------*/ 2519 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2520 { 2521 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2522 2523 PetscFunctionBegin; 2524 if (mumps->id.job == JOB_NULL) { 2525 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2526 for (i = 0; i < nCNTL_pre; ++i) 2527 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2528 if (i == nCNTL_pre) { 2529 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2530 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2531 mumps->CNTL_pre[0]++; 2532 } 2533 mumps->CNTL_pre[1 + 2 * i] = icntl; 2534 mumps->CNTL_pre[2 + 2 * i] = val; 2535 } else mumps->id.CNTL(icntl) = val; 2536 PetscFunctionReturn(0); 2537 } 2538 2539 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2540 { 2541 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2542 2543 PetscFunctionBegin; 2544 *val = mumps->id.CNTL(icntl); 2545 PetscFunctionReturn(0); 2546 } 2547 2548 /*@ 2549 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2550 2551 Logically Collective 2552 2553 Input Parameters: 2554 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2555 . icntl - index of MUMPS parameter array CNTL() 2556 - val - value of MUMPS CNTL(icntl) 2557 2558 Options Database Key: 2559 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2560 2561 Level: beginner 2562 2563 References: 2564 . * - MUMPS Users' Guide 2565 2566 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2567 @*/ 2568 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2569 { 2570 PetscFunctionBegin; 2571 PetscValidType(F, 1); 2572 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2573 PetscValidLogicalCollectiveInt(F, icntl, 2); 2574 PetscValidLogicalCollectiveReal(F, val, 3); 2575 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2576 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2577 PetscFunctionReturn(0); 2578 } 2579 2580 /*@ 2581 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2582 2583 Logically Collective 2584 2585 Input Parameters: 2586 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2587 - icntl - index of MUMPS parameter array CNTL() 2588 2589 Output Parameter: 2590 . val - value of MUMPS CNTL(icntl) 2591 2592 Level: beginner 2593 2594 References: 2595 . * - MUMPS Users' Guide 2596 2597 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2598 @*/ 2599 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2600 { 2601 PetscFunctionBegin; 2602 PetscValidType(F, 1); 2603 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2604 PetscValidLogicalCollectiveInt(F, icntl, 2); 2605 PetscValidRealPointer(val, 3); 2606 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2607 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2608 PetscFunctionReturn(0); 2609 } 2610 2611 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2612 { 2613 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2614 2615 PetscFunctionBegin; 2616 *info = mumps->id.INFO(icntl); 2617 PetscFunctionReturn(0); 2618 } 2619 2620 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2621 { 2622 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2623 2624 PetscFunctionBegin; 2625 *infog = mumps->id.INFOG(icntl); 2626 PetscFunctionReturn(0); 2627 } 2628 2629 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2630 { 2631 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2632 2633 PetscFunctionBegin; 2634 *rinfo = mumps->id.RINFO(icntl); 2635 PetscFunctionReturn(0); 2636 } 2637 2638 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2639 { 2640 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2641 2642 PetscFunctionBegin; 2643 *rinfog = mumps->id.RINFOG(icntl); 2644 PetscFunctionReturn(0); 2645 } 2646 2647 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2648 { 2649 Mat Bt = NULL, Btseq = NULL; 2650 PetscBool flg; 2651 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2652 PetscScalar *aa; 2653 PetscInt spnr, *ia, *ja, M, nrhs; 2654 2655 PetscFunctionBegin; 2656 PetscValidPointer(spRHS, 2); 2657 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2658 if (flg) { 2659 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2660 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2661 2662 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2663 2664 if (mumps->petsc_size > 1) { 2665 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2666 Btseq = b->A; 2667 } else { 2668 Btseq = Bt; 2669 } 2670 2671 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2672 mumps->id.nrhs = nrhs; 2673 mumps->id.lrhs = M; 2674 mumps->id.rhs = NULL; 2675 2676 if (!mumps->myid) { 2677 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2678 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2679 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2680 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2681 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2682 } else { 2683 mumps->id.irhs_ptr = NULL; 2684 mumps->id.irhs_sparse = NULL; 2685 mumps->id.nz_rhs = 0; 2686 mumps->id.rhs_sparse = NULL; 2687 } 2688 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2689 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2690 2691 /* solve phase */ 2692 /*-------------*/ 2693 mumps->id.job = JOB_SOLVE; 2694 PetscMUMPS_c(mumps); 2695 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 2696 2697 if (!mumps->myid) { 2698 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 2699 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2700 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2701 } 2702 PetscFunctionReturn(0); 2703 } 2704 2705 /*@ 2706 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2707 2708 Logically Collective 2709 2710 Input Parameters: 2711 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2712 - spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format holding specified indices in processor[0] 2713 2714 Output Parameter: 2715 . spRHS - requested entries of inverse of A 2716 2717 Level: beginner 2718 2719 References: 2720 . * - MUMPS Users' Guide 2721 2722 .seealso: `MatGetFactor()`, `MatCreateTranspose()` 2723 @*/ 2724 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2725 { 2726 PetscFunctionBegin; 2727 PetscValidType(F, 1); 2728 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2729 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 2730 PetscFunctionReturn(0); 2731 } 2732 2733 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 2734 { 2735 Mat spRHS; 2736 2737 PetscFunctionBegin; 2738 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 2739 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 2740 PetscCall(MatDestroy(&spRHS)); 2741 PetscFunctionReturn(0); 2742 } 2743 2744 /*@ 2745 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2746 2747 Logically Collective 2748 2749 Input Parameters: 2750 + F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2751 - spRHST - sequential sparse matrix in `MATAIJ` format holding specified indices of A^T in processor[0] 2752 2753 Output Parameter: 2754 . spRHST - requested entries of inverse of A^T 2755 2756 Level: beginner 2757 2758 References: 2759 . * - MUMPS Users' Guide 2760 2761 .seealso: `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 2762 @*/ 2763 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 2764 { 2765 PetscBool flg; 2766 2767 PetscFunctionBegin; 2768 PetscValidType(F, 1); 2769 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2770 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 2771 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 2772 2773 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 2774 PetscFunctionReturn(0); 2775 } 2776 2777 /*@ 2778 MatMumpsGetInfo - Get MUMPS parameter INFO() 2779 2780 Logically Collective 2781 2782 Input Parameters: 2783 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2784 - icntl - index of MUMPS parameter array INFO() 2785 2786 Output Parameter: 2787 . ival - value of MUMPS INFO(icntl) 2788 2789 Level: beginner 2790 2791 References: 2792 . * - MUMPS Users' Guide 2793 2794 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2795 @*/ 2796 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 2797 { 2798 PetscFunctionBegin; 2799 PetscValidType(F, 1); 2800 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2801 PetscValidIntPointer(ival, 3); 2802 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2803 PetscFunctionReturn(0); 2804 } 2805 2806 /*@ 2807 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2808 2809 Logically Collective 2810 2811 Input Parameters: 2812 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2813 - icntl - index of MUMPS parameter array INFOG() 2814 2815 Output Parameter: 2816 . ival - value of MUMPS INFOG(icntl) 2817 2818 Level: beginner 2819 2820 References: 2821 . * - MUMPS Users' Guide 2822 2823 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2824 @*/ 2825 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 2826 { 2827 PetscFunctionBegin; 2828 PetscValidType(F, 1); 2829 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2830 PetscValidIntPointer(ival, 3); 2831 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2832 PetscFunctionReturn(0); 2833 } 2834 2835 /*@ 2836 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2837 2838 Logically Collective 2839 2840 Input Parameters: 2841 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2842 - icntl - index of MUMPS parameter array RINFO() 2843 2844 Output Parameter: 2845 . val - value of MUMPS RINFO(icntl) 2846 2847 Level: beginner 2848 2849 References: 2850 . * - MUMPS Users' Guide 2851 2852 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 2853 @*/ 2854 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 2855 { 2856 PetscFunctionBegin; 2857 PetscValidType(F, 1); 2858 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2859 PetscValidRealPointer(val, 3); 2860 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2861 PetscFunctionReturn(0); 2862 } 2863 2864 /*@ 2865 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2866 2867 Logically Collective 2868 2869 Input Parameters: 2870 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2871 - icntl - index of MUMPS parameter array RINFOG() 2872 2873 Output Parameter: 2874 . val - value of MUMPS RINFOG(icntl) 2875 2876 Level: beginner 2877 2878 References: 2879 . * - MUMPS Users' Guide 2880 2881 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 2882 @*/ 2883 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 2884 { 2885 PetscFunctionBegin; 2886 PetscValidType(F, 1); 2887 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2888 PetscValidRealPointer(val, 3); 2889 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2890 PetscFunctionReturn(0); 2891 } 2892 2893 /*MC 2894 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2895 distributed and sequential matrices via the external package MUMPS. 2896 2897 Works with `MATAIJ` and `MATSBAIJ` matrices 2898 2899 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2900 2901 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2902 2903 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2904 2905 Options Database Keys: 2906 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2907 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2908 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2909 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2910 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2911 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2912 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 2913 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2914 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2915 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2916 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2917 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2918 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2919 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 2920 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2921 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2922 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2923 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2924 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2925 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2926 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2927 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2928 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2929 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2930 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2931 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2932 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2933 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2934 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2935 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2936 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2937 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2938 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2939 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2940 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2941 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2942 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2943 2944 Level: beginner 2945 2946 Notes: 2947 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will error if the matrix is Hermitian. 2948 2949 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 2950 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 2951 2952 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about the failure by calling 2953 $ KSPGetPC(ksp,&pc); 2954 $ PCFactorGetMatrix(pc,&mat); 2955 $ MatMumpsGetInfo(mat,....); 2956 $ MatMumpsGetInfog(mat,....); etc. 2957 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2958 2959 Using MUMPS with 64-bit integers 2960 MUMPS provides 64-bit integer support in two build modes: 2961 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 2962 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 2963 2964 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 2965 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 2966 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 2967 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 2968 2969 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 2970 2971 Two modes to run MUMPS/PETSc with OpenMP 2972 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2973 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2974 2975 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2976 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2977 2978 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2979 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2980 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2981 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2982 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2983 2984 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2985 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2986 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2987 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2988 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2989 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2990 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2991 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2992 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2993 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2994 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2995 examine the mapping result. 2996 2997 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 2998 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 2999 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3000 3001 References: 3002 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3003 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3004 3005 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3006 M*/ 3007 3008 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3009 { 3010 PetscFunctionBegin; 3011 *type = MATSOLVERMUMPS; 3012 PetscFunctionReturn(0); 3013 } 3014 3015 /* MatGetFactor for Seq and MPI AIJ matrices */ 3016 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3017 { 3018 Mat B; 3019 Mat_MUMPS *mumps; 3020 PetscBool isSeqAIJ; 3021 PetscMPIInt size; 3022 3023 PetscFunctionBegin; 3024 #if defined(PETSC_USE_COMPLEX) 3025 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3026 #endif 3027 /* Create the factorization matrix */ 3028 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3029 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3030 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3031 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3032 PetscCall(MatSetUp(B)); 3033 3034 PetscCall(PetscNew(&mumps)); 3035 3036 B->ops->view = MatView_MUMPS; 3037 B->ops->getinfo = MatGetInfo_MUMPS; 3038 3039 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3040 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3041 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3042 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3043 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3044 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3045 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3046 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3047 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3048 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3049 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3050 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3051 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3052 3053 if (ftype == MAT_FACTOR_LU) { 3054 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3055 B->factortype = MAT_FACTOR_LU; 3056 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3057 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3058 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3059 mumps->sym = 0; 3060 } else { 3061 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3062 B->factortype = MAT_FACTOR_CHOLESKY; 3063 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3064 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3065 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3066 #if defined(PETSC_USE_COMPLEX) 3067 mumps->sym = 2; 3068 #else 3069 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3070 else mumps->sym = 2; 3071 #endif 3072 } 3073 3074 /* set solvertype */ 3075 PetscCall(PetscFree(B->solvertype)); 3076 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3077 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3078 if (size == 1) { 3079 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3080 B->canuseordering = PETSC_TRUE; 3081 } 3082 B->ops->destroy = MatDestroy_MUMPS; 3083 B->data = (void *)mumps; 3084 3085 *F = B; 3086 mumps->id.job = JOB_NULL; 3087 mumps->ICNTL_pre = NULL; 3088 mumps->CNTL_pre = NULL; 3089 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3090 PetscFunctionReturn(0); 3091 } 3092 3093 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3094 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3095 { 3096 Mat B; 3097 Mat_MUMPS *mumps; 3098 PetscBool isSeqSBAIJ; 3099 PetscMPIInt size; 3100 3101 PetscFunctionBegin; 3102 #if defined(PETSC_USE_COMPLEX) 3103 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3104 #endif 3105 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3106 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3107 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3108 PetscCall(MatSetUp(B)); 3109 3110 PetscCall(PetscNew(&mumps)); 3111 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3112 if (isSeqSBAIJ) { 3113 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3114 } else { 3115 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3116 } 3117 3118 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3119 B->ops->view = MatView_MUMPS; 3120 B->ops->getinfo = MatGetInfo_MUMPS; 3121 3122 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3123 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3124 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3125 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3126 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3127 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3128 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3129 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3130 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3131 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3132 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3133 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3134 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3135 3136 B->factortype = MAT_FACTOR_CHOLESKY; 3137 #if defined(PETSC_USE_COMPLEX) 3138 mumps->sym = 2; 3139 #else 3140 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3141 else mumps->sym = 2; 3142 #endif 3143 3144 /* set solvertype */ 3145 PetscCall(PetscFree(B->solvertype)); 3146 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3147 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3148 if (size == 1) { 3149 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3150 B->canuseordering = PETSC_TRUE; 3151 } 3152 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3153 B->ops->destroy = MatDestroy_MUMPS; 3154 B->data = (void *)mumps; 3155 3156 *F = B; 3157 mumps->id.job = JOB_NULL; 3158 mumps->ICNTL_pre = NULL; 3159 mumps->CNTL_pre = NULL; 3160 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3161 PetscFunctionReturn(0); 3162 } 3163 3164 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3165 { 3166 Mat B; 3167 Mat_MUMPS *mumps; 3168 PetscBool isSeqBAIJ; 3169 PetscMPIInt size; 3170 3171 PetscFunctionBegin; 3172 /* Create the factorization matrix */ 3173 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3174 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3175 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3176 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3177 PetscCall(MatSetUp(B)); 3178 3179 PetscCall(PetscNew(&mumps)); 3180 if (ftype == MAT_FACTOR_LU) { 3181 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3182 B->factortype = MAT_FACTOR_LU; 3183 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3184 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3185 mumps->sym = 0; 3186 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3187 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3188 3189 B->ops->view = MatView_MUMPS; 3190 B->ops->getinfo = MatGetInfo_MUMPS; 3191 3192 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3193 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3194 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3195 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3196 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3197 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3198 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3199 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3200 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3201 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3202 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3203 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3204 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3205 3206 /* set solvertype */ 3207 PetscCall(PetscFree(B->solvertype)); 3208 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3209 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3210 if (size == 1) { 3211 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3212 B->canuseordering = PETSC_TRUE; 3213 } 3214 B->ops->destroy = MatDestroy_MUMPS; 3215 B->data = (void *)mumps; 3216 3217 *F = B; 3218 mumps->id.job = JOB_NULL; 3219 mumps->ICNTL_pre = NULL; 3220 mumps->CNTL_pre = NULL; 3221 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3222 PetscFunctionReturn(0); 3223 } 3224 3225 /* MatGetFactor for Seq and MPI SELL matrices */ 3226 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3227 { 3228 Mat B; 3229 Mat_MUMPS *mumps; 3230 PetscBool isSeqSELL; 3231 PetscMPIInt size; 3232 3233 PetscFunctionBegin; 3234 /* Create the factorization matrix */ 3235 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3236 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3237 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3238 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3239 PetscCall(MatSetUp(B)); 3240 3241 PetscCall(PetscNew(&mumps)); 3242 3243 B->ops->view = MatView_MUMPS; 3244 B->ops->getinfo = MatGetInfo_MUMPS; 3245 3246 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3247 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3248 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3249 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3250 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3251 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3252 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3253 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3254 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3255 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3256 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3257 3258 if (ftype == MAT_FACTOR_LU) { 3259 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3260 B->factortype = MAT_FACTOR_LU; 3261 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3262 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3263 mumps->sym = 0; 3264 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3265 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3266 3267 /* set solvertype */ 3268 PetscCall(PetscFree(B->solvertype)); 3269 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3270 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3271 if (size == 1) { 3272 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3273 B->canuseordering = PETSC_TRUE; 3274 } 3275 B->ops->destroy = MatDestroy_MUMPS; 3276 B->data = (void *)mumps; 3277 3278 *F = B; 3279 mumps->id.job = JOB_NULL; 3280 mumps->ICNTL_pre = NULL; 3281 mumps->CNTL_pre = NULL; 3282 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3283 PetscFunctionReturn(0); 3284 } 3285 3286 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3287 { 3288 PetscFunctionBegin; 3289 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3290 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3291 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3292 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3293 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3294 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3295 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3296 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3297 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3298 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3299 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3300 PetscFunctionReturn(0); 3301 } 3302