1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_NULL 0 27 #define JOB_FACTSYMBOLIC 1 28 #define JOB_FACTNUMERIC 2 29 #define JOB_SOLVE 3 30 #define JOB_END -2 31 32 /* calls to MUMPS */ 33 #if defined(PETSC_USE_COMPLEX) 34 #if defined(PETSC_USE_REAL_SINGLE) 35 #define MUMPS_c cmumps_c 36 #else 37 #define MUMPS_c zmumps_c 38 #endif 39 #else 40 #if defined(PETSC_USE_REAL_SINGLE) 41 #define MUMPS_c smumps_c 42 #else 43 #define MUMPS_c dmumps_c 44 #endif 45 #endif 46 47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 48 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 49 naming convention in PetscMPIInt, PetscBLASInt etc. 50 */ 51 typedef MUMPS_INT PetscMUMPSInt; 52 53 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 54 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 55 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 56 #endif 57 #else 58 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 59 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 60 #endif 61 #endif 62 63 #define MPIU_MUMPSINT MPI_INT 64 #define PETSC_MUMPS_INT_MAX 2147483647 65 #define PETSC_MUMPS_INT_MIN -2147483648 66 67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 69 { 70 PetscFunctionBegin; 71 #if PetscDefined(USE_64BIT_INDICES) 72 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 73 #endif 74 *b = (PetscMUMPSInt)(a); 75 PetscFunctionReturn(PETSC_SUCCESS); 76 } 77 78 /* Put these utility routines here since they are only used in this file */ 79 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 80 { 81 PetscInt myval; 82 PetscBool myset; 83 PetscFunctionBegin; 84 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 85 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 86 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 87 if (set) *set = myset; 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 91 92 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 93 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 94 #define PetscMUMPS_c(mumps) \ 95 do { \ 96 if (mumps->use_petsc_omp_support) { \ 97 if (mumps->is_omp_master) { \ 98 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 99 MUMPS_c(&mumps->id); \ 100 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 101 } \ 102 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 103 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 104 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 105 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 106 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 107 */ \ 108 PetscCallMPI(MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 109 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm)); \ 110 PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 111 } else { \ 112 MUMPS_c(&mumps->id); \ 113 } \ 114 } while (0) 115 #else 116 #define PetscMUMPS_c(mumps) \ 117 do { \ 118 MUMPS_c(&mumps->id); \ 119 } while (0) 120 #endif 121 122 /* declare MumpsScalar */ 123 #if defined(PETSC_USE_COMPLEX) 124 #if defined(PETSC_USE_REAL_SINGLE) 125 #define MumpsScalar mumps_complex 126 #else 127 #define MumpsScalar mumps_double_complex 128 #endif 129 #else 130 #define MumpsScalar PetscScalar 131 #endif 132 133 /* macros s.t. indices match MUMPS documentation */ 134 #define ICNTL(I) icntl[(I)-1] 135 #define CNTL(I) cntl[(I)-1] 136 #define INFOG(I) infog[(I)-1] 137 #define INFO(I) info[(I)-1] 138 #define RINFOG(I) rinfog[(I)-1] 139 #define RINFO(I) rinfo[(I)-1] 140 141 typedef struct Mat_MUMPS Mat_MUMPS; 142 struct Mat_MUMPS { 143 #if defined(PETSC_USE_COMPLEX) 144 #if defined(PETSC_USE_REAL_SINGLE) 145 CMUMPS_STRUC_C id; 146 #else 147 ZMUMPS_STRUC_C id; 148 #endif 149 #else 150 #if defined(PETSC_USE_REAL_SINGLE) 151 SMUMPS_STRUC_C id; 152 #else 153 DMUMPS_STRUC_C id; 154 #endif 155 #endif 156 157 MatStructure matstruc; 158 PetscMPIInt myid, petsc_size; 159 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 160 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 161 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 162 PetscMUMPSInt sym; 163 MPI_Comm mumps_comm; 164 PetscMUMPSInt *ICNTL_pre; 165 PetscReal *CNTL_pre; 166 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 167 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 168 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 169 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 170 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 171 PetscInt *rhs_nrow, max_nrhs; 172 PetscMPIInt *rhs_recvcounts, *rhs_disps; 173 PetscScalar *rhs_loc, *rhs_recvbuf; 174 #endif 175 Vec b_seq, x_seq; 176 PetscInt ninfo, *info; /* which INFO to display */ 177 PetscInt sizeredrhs; 178 PetscScalar *schur_sol; 179 PetscInt schur_sizesol; 180 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 181 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 182 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 183 184 /* stuff used by petsc/mumps OpenMP support*/ 185 PetscBool use_petsc_omp_support; 186 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 187 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 188 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 189 PetscMPIInt tag, omp_comm_size; 190 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 191 MPI_Request *reqs; 192 }; 193 194 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 195 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 196 */ 197 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 198 { 199 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 200 201 PetscFunctionBegin; 202 #if defined(PETSC_USE_64BIT_INDICES) 203 { 204 PetscInt i; 205 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 206 PetscCall(PetscFree(mumps->ia_alloc)); 207 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 208 mumps->cur_ilen = nrow + 1; 209 } 210 if (nnz > mumps->cur_jlen) { 211 PetscCall(PetscFree(mumps->ja_alloc)); 212 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 213 mumps->cur_jlen = nnz; 214 } 215 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 216 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 217 *ia_mumps = mumps->ia_alloc; 218 *ja_mumps = mumps->ja_alloc; 219 } 220 #else 221 *ia_mumps = ia; 222 *ja_mumps = ja; 223 #endif 224 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 225 PetscFunctionReturn(PETSC_SUCCESS); 226 } 227 228 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 229 { 230 PetscFunctionBegin; 231 PetscCall(PetscFree(mumps->id.listvar_schur)); 232 PetscCall(PetscFree(mumps->id.redrhs)); 233 PetscCall(PetscFree(mumps->schur_sol)); 234 mumps->id.size_schur = 0; 235 mumps->id.schur_lld = 0; 236 mumps->id.ICNTL(19) = 0; 237 PetscFunctionReturn(PETSC_SUCCESS); 238 } 239 240 /* solve with rhs in mumps->id.redrhs and return in the same location */ 241 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 242 { 243 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 244 Mat S, B, X; 245 MatFactorSchurStatus schurstatus; 246 PetscInt sizesol; 247 248 PetscFunctionBegin; 249 PetscCall(MatFactorFactorizeSchurComplement(F)); 250 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 251 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 252 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 253 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 254 PetscCall(MatBindToCPU(B, S->boundtocpu)); 255 #endif 256 switch (schurstatus) { 257 case MAT_FACTOR_SCHUR_FACTORED: 258 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 259 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 260 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 261 PetscCall(MatBindToCPU(X, S->boundtocpu)); 262 #endif 263 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 264 PetscCall(MatMatSolveTranspose(S, B, X)); 265 } else { 266 PetscCall(MatMatSolve(S, B, X)); 267 } 268 break; 269 case MAT_FACTOR_SCHUR_INVERTED: 270 sizesol = mumps->id.nrhs * mumps->id.size_schur; 271 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 272 PetscCall(PetscFree(mumps->schur_sol)); 273 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 274 mumps->schur_sizesol = sizesol; 275 } 276 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 277 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 278 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 279 PetscCall(MatBindToCPU(X, S->boundtocpu)); 280 #endif 281 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 282 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 283 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 284 } else { 285 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 286 } 287 PetscCall(MatProductSetFromOptions(X)); 288 PetscCall(MatProductSymbolic(X)); 289 PetscCall(MatProductNumeric(X)); 290 291 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 292 break; 293 default: 294 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 295 } 296 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 297 PetscCall(MatDestroy(&B)); 298 PetscCall(MatDestroy(&X)); 299 PetscFunctionReturn(PETSC_SUCCESS); 300 } 301 302 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 303 { 304 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 305 306 PetscFunctionBegin; 307 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 308 PetscFunctionReturn(PETSC_SUCCESS); 309 } 310 if (!expansion) { /* prepare for the condensation step */ 311 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 312 /* allocate MUMPS internal array to store reduced right-hand sides */ 313 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 314 PetscCall(PetscFree(mumps->id.redrhs)); 315 mumps->id.lredrhs = mumps->id.size_schur; 316 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 317 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 318 } 319 mumps->id.ICNTL(26) = 1; /* condensation phase */ 320 } else { /* prepare for the expansion step */ 321 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 322 PetscCall(MatMumpsSolveSchur_Private(F)); 323 mumps->id.ICNTL(26) = 2; /* expansion phase */ 324 PetscMUMPS_c(mumps); 325 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 326 /* restore defaults */ 327 mumps->id.ICNTL(26) = -1; 328 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 329 if (mumps->id.nrhs > 1) { 330 PetscCall(PetscFree(mumps->id.redrhs)); 331 mumps->id.lredrhs = 0; 332 mumps->sizeredrhs = 0; 333 } 334 } 335 PetscFunctionReturn(PETSC_SUCCESS); 336 } 337 338 /* 339 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 340 341 input: 342 A - matrix in aij,baij or sbaij format 343 shift - 0: C style output triple; 1: Fortran style output triple. 344 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 345 MAT_REUSE_MATRIX: only the values in v array are updated 346 output: 347 nnz - dim of r, c, and v (number of local nonzero entries of A) 348 r, c, v - row and col index, matrix values (matrix triples) 349 350 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 351 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 352 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 353 354 */ 355 356 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 357 { 358 const PetscScalar *av; 359 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 360 PetscInt64 nz, rnz, i, j, k; 361 PetscMUMPSInt *row, *col; 362 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 363 364 PetscFunctionBegin; 365 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 366 mumps->val = (PetscScalar *)av; 367 if (reuse == MAT_INITIAL_MATRIX) { 368 nz = aa->nz; 369 ai = aa->i; 370 aj = aa->j; 371 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 372 for (i = k = 0; i < M; i++) { 373 rnz = ai[i + 1] - ai[i]; 374 ajj = aj + ai[i]; 375 for (j = 0; j < rnz; j++) { 376 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 377 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 378 k++; 379 } 380 } 381 mumps->irn = row; 382 mumps->jcn = col; 383 mumps->nnz = nz; 384 } 385 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 386 PetscFunctionReturn(PETSC_SUCCESS); 387 } 388 389 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 390 { 391 PetscInt64 nz, i, j, k, r; 392 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 393 PetscMUMPSInt *row, *col; 394 395 PetscFunctionBegin; 396 mumps->val = a->val; 397 if (reuse == MAT_INITIAL_MATRIX) { 398 nz = a->sliidx[a->totalslices]; 399 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 400 for (i = k = 0; i < a->totalslices; i++) { 401 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 402 } 403 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 404 mumps->irn = row; 405 mumps->jcn = col; 406 mumps->nnz = nz; 407 } 408 PetscFunctionReturn(PETSC_SUCCESS); 409 } 410 411 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 412 { 413 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 414 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 415 PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 416 PetscInt bs; 417 PetscMUMPSInt *row, *col; 418 419 PetscFunctionBegin; 420 PetscCall(MatGetBlockSize(A, &bs)); 421 M = A->rmap->N / bs; 422 mumps->val = aa->a; 423 if (reuse == MAT_INITIAL_MATRIX) { 424 ai = aa->i; 425 aj = aa->j; 426 nz = bs2 * aa->nz; 427 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 428 for (i = 0; i < M; i++) { 429 ajj = aj + ai[i]; 430 rnz = ai[i + 1] - ai[i]; 431 for (k = 0; k < rnz; k++) { 432 for (j = 0; j < bs; j++) { 433 for (m = 0; m < bs; m++) { 434 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 435 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 436 idx++; 437 } 438 } 439 } 440 } 441 mumps->irn = row; 442 mumps->jcn = col; 443 mumps->nnz = nz; 444 } 445 PetscFunctionReturn(PETSC_SUCCESS); 446 } 447 448 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 449 { 450 const PetscInt *ai, *aj, *ajj; 451 PetscInt bs; 452 PetscInt64 nz, rnz, i, j, k, m; 453 PetscMUMPSInt *row, *col; 454 PetscScalar *val; 455 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 456 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 457 #if defined(PETSC_USE_COMPLEX) 458 PetscBool isset, hermitian; 459 #endif 460 461 PetscFunctionBegin; 462 #if defined(PETSC_USE_COMPLEX) 463 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 464 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 465 #endif 466 ai = aa->i; 467 aj = aa->j; 468 PetscCall(MatGetBlockSize(A, &bs)); 469 if (reuse == MAT_INITIAL_MATRIX) { 470 const PetscInt64 alloc_size = aa->nz * bs2; 471 472 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 473 if (bs > 1) { 474 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 475 mumps->val = mumps->val_alloc; 476 } else { 477 mumps->val = aa->a; 478 } 479 mumps->irn = row; 480 mumps->jcn = col; 481 } else { 482 if (bs == 1) mumps->val = aa->a; 483 row = mumps->irn; 484 col = mumps->jcn; 485 } 486 val = mumps->val; 487 488 nz = 0; 489 if (bs > 1) { 490 for (i = 0; i < mbs; i++) { 491 rnz = ai[i + 1] - ai[i]; 492 ajj = aj + ai[i]; 493 for (j = 0; j < rnz; j++) { 494 for (k = 0; k < bs; k++) { 495 for (m = 0; m < bs; m++) { 496 if (ajj[j] > i || k >= m) { 497 if (reuse == MAT_INITIAL_MATRIX) { 498 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 499 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 500 } 501 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 502 } 503 } 504 } 505 } 506 } 507 } else if (reuse == MAT_INITIAL_MATRIX) { 508 for (i = 0; i < mbs; i++) { 509 rnz = ai[i + 1] - ai[i]; 510 ajj = aj + ai[i]; 511 for (j = 0; j < rnz; j++) { 512 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 513 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 514 nz++; 515 } 516 } 517 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 518 } 519 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 520 PetscFunctionReturn(PETSC_SUCCESS); 521 } 522 523 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 524 { 525 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 526 PetscInt64 nz, rnz, i, j; 527 const PetscScalar *av, *v1; 528 PetscScalar *val; 529 PetscMUMPSInt *row, *col; 530 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 531 PetscBool missing; 532 #if defined(PETSC_USE_COMPLEX) 533 PetscBool hermitian, isset; 534 #endif 535 536 PetscFunctionBegin; 537 #if defined(PETSC_USE_COMPLEX) 538 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 539 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 540 #endif 541 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 542 ai = aa->i; 543 aj = aa->j; 544 adiag = aa->diag; 545 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 546 if (reuse == MAT_INITIAL_MATRIX) { 547 /* count nz in the upper triangular part of A */ 548 nz = 0; 549 if (missing) { 550 for (i = 0; i < M; i++) { 551 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 552 for (j = ai[i]; j < ai[i + 1]; j++) { 553 if (aj[j] < i) continue; 554 nz++; 555 } 556 } else { 557 nz += ai[i + 1] - adiag[i]; 558 } 559 } 560 } else { 561 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 562 } 563 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 564 PetscCall(PetscMalloc1(nz, &val)); 565 mumps->nnz = nz; 566 mumps->irn = row; 567 mumps->jcn = col; 568 mumps->val = mumps->val_alloc = val; 569 570 nz = 0; 571 if (missing) { 572 for (i = 0; i < M; i++) { 573 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 574 for (j = ai[i]; j < ai[i + 1]; j++) { 575 if (aj[j] < i) continue; 576 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 577 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 578 val[nz] = av[j]; 579 nz++; 580 } 581 } else { 582 rnz = ai[i + 1] - adiag[i]; 583 ajj = aj + adiag[i]; 584 v1 = av + adiag[i]; 585 for (j = 0; j < rnz; j++) { 586 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 587 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 588 val[nz++] = v1[j]; 589 } 590 } 591 } 592 } else { 593 for (i = 0; i < M; i++) { 594 rnz = ai[i + 1] - adiag[i]; 595 ajj = aj + adiag[i]; 596 v1 = av + adiag[i]; 597 for (j = 0; j < rnz; j++) { 598 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 599 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 600 val[nz++] = v1[j]; 601 } 602 } 603 } 604 } else { 605 nz = 0; 606 val = mumps->val; 607 if (missing) { 608 for (i = 0; i < M; i++) { 609 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 610 for (j = ai[i]; j < ai[i + 1]; j++) { 611 if (aj[j] < i) continue; 612 val[nz++] = av[j]; 613 } 614 } else { 615 rnz = ai[i + 1] - adiag[i]; 616 v1 = av + adiag[i]; 617 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 618 } 619 } 620 } else { 621 for (i = 0; i < M; i++) { 622 rnz = ai[i + 1] - adiag[i]; 623 v1 = av + adiag[i]; 624 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 625 } 626 } 627 } 628 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 629 PetscFunctionReturn(PETSC_SUCCESS); 630 } 631 632 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 633 { 634 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 635 PetscInt bs; 636 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 637 PetscMUMPSInt *row, *col; 638 const PetscScalar *av, *bv, *v1, *v2; 639 PetscScalar *val; 640 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 641 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 642 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 643 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 644 #if defined(PETSC_USE_COMPLEX) 645 PetscBool hermitian, isset; 646 #endif 647 648 PetscFunctionBegin; 649 #if defined(PETSC_USE_COMPLEX) 650 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 651 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 652 #endif 653 PetscCall(MatGetBlockSize(A, &bs)); 654 rstart = A->rmap->rstart; 655 ai = aa->i; 656 aj = aa->j; 657 bi = bb->i; 658 bj = bb->j; 659 av = aa->a; 660 bv = bb->a; 661 662 garray = mat->garray; 663 664 if (reuse == MAT_INITIAL_MATRIX) { 665 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 666 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 667 PetscCall(PetscMalloc1(nz, &val)); 668 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 669 mumps->irn = row; 670 mumps->jcn = col; 671 mumps->val = mumps->val_alloc = val; 672 } else { 673 val = mumps->val; 674 } 675 676 jj = 0; 677 irow = rstart; 678 for (i = 0; i < mbs; i++) { 679 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 680 countA = ai[i + 1] - ai[i]; 681 countB = bi[i + 1] - bi[i]; 682 bjj = bj + bi[i]; 683 v1 = av + ai[i] * bs2; 684 v2 = bv + bi[i] * bs2; 685 686 if (bs > 1) { 687 /* A-part */ 688 for (j = 0; j < countA; j++) { 689 for (k = 0; k < bs; k++) { 690 for (m = 0; m < bs; m++) { 691 if (rstart + ajj[j] * bs > irow || k >= m) { 692 if (reuse == MAT_INITIAL_MATRIX) { 693 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 694 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 695 } 696 val[jj++] = v1[j * bs2 + m + k * bs]; 697 } 698 } 699 } 700 } 701 702 /* B-part */ 703 for (j = 0; j < countB; j++) { 704 for (k = 0; k < bs; k++) { 705 for (m = 0; m < bs; m++) { 706 if (reuse == MAT_INITIAL_MATRIX) { 707 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 708 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 709 } 710 val[jj++] = v2[j * bs2 + m + k * bs]; 711 } 712 } 713 } 714 } else { 715 /* A-part */ 716 for (j = 0; j < countA; j++) { 717 if (reuse == MAT_INITIAL_MATRIX) { 718 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 719 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 720 } 721 val[jj++] = v1[j]; 722 } 723 724 /* B-part */ 725 for (j = 0; j < countB; j++) { 726 if (reuse == MAT_INITIAL_MATRIX) { 727 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 728 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 729 } 730 val[jj++] = v2[j]; 731 } 732 } 733 irow += bs; 734 } 735 mumps->nnz = jj; 736 PetscFunctionReturn(PETSC_SUCCESS); 737 } 738 739 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 740 { 741 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 742 PetscInt64 rstart, nz, i, j, jj, irow, countA, countB; 743 PetscMUMPSInt *row, *col; 744 const PetscScalar *av, *bv, *v1, *v2; 745 PetscScalar *val; 746 Mat Ad, Ao; 747 Mat_SeqAIJ *aa; 748 Mat_SeqAIJ *bb; 749 750 PetscFunctionBegin; 751 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 752 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 753 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 754 755 aa = (Mat_SeqAIJ *)(Ad)->data; 756 bb = (Mat_SeqAIJ *)(Ao)->data; 757 ai = aa->i; 758 aj = aa->j; 759 bi = bb->i; 760 bj = bb->j; 761 762 rstart = A->rmap->rstart; 763 764 if (reuse == MAT_INITIAL_MATRIX) { 765 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 766 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 767 PetscCall(PetscMalloc1(nz, &val)); 768 mumps->nnz = nz; 769 mumps->irn = row; 770 mumps->jcn = col; 771 mumps->val = mumps->val_alloc = val; 772 } else { 773 val = mumps->val; 774 } 775 776 jj = 0; 777 irow = rstart; 778 for (i = 0; i < m; i++) { 779 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 780 countA = ai[i + 1] - ai[i]; 781 countB = bi[i + 1] - bi[i]; 782 bjj = bj + bi[i]; 783 v1 = av + ai[i]; 784 v2 = bv + bi[i]; 785 786 /* A-part */ 787 for (j = 0; j < countA; j++) { 788 if (reuse == MAT_INITIAL_MATRIX) { 789 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 790 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 791 } 792 val[jj++] = v1[j]; 793 } 794 795 /* B-part */ 796 for (j = 0; j < countB; j++) { 797 if (reuse == MAT_INITIAL_MATRIX) { 798 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 799 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 800 } 801 val[jj++] = v2[j]; 802 } 803 irow++; 804 } 805 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 806 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 807 PetscFunctionReturn(PETSC_SUCCESS); 808 } 809 810 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 811 { 812 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 813 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 814 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 815 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 816 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart; 817 const PetscInt bs2 = mat->bs2; 818 PetscInt bs; 819 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 820 PetscMUMPSInt *row, *col; 821 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 822 PetscScalar *val; 823 824 PetscFunctionBegin; 825 PetscCall(MatGetBlockSize(A, &bs)); 826 if (reuse == MAT_INITIAL_MATRIX) { 827 nz = bs2 * (aa->nz + bb->nz); 828 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 829 PetscCall(PetscMalloc1(nz, &val)); 830 mumps->nnz = nz; 831 mumps->irn = row; 832 mumps->jcn = col; 833 mumps->val = mumps->val_alloc = val; 834 } else { 835 val = mumps->val; 836 } 837 838 jj = 0; 839 irow = rstart; 840 for (i = 0; i < mbs; i++) { 841 countA = ai[i + 1] - ai[i]; 842 countB = bi[i + 1] - bi[i]; 843 ajj = aj + ai[i]; 844 bjj = bj + bi[i]; 845 v1 = av + bs2 * ai[i]; 846 v2 = bv + bs2 * bi[i]; 847 848 idx = 0; 849 /* A-part */ 850 for (k = 0; k < countA; k++) { 851 for (j = 0; j < bs; j++) { 852 for (n = 0; n < bs; n++) { 853 if (reuse == MAT_INITIAL_MATRIX) { 854 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 855 PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj])); 856 } 857 val[jj++] = v1[idx++]; 858 } 859 } 860 } 861 862 idx = 0; 863 /* B-part */ 864 for (k = 0; k < countB; k++) { 865 for (j = 0; j < bs; j++) { 866 for (n = 0; n < bs; n++) { 867 if (reuse == MAT_INITIAL_MATRIX) { 868 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 869 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 870 } 871 val[jj++] = v2[idx++]; 872 } 873 } 874 } 875 irow += bs; 876 } 877 PetscFunctionReturn(PETSC_SUCCESS); 878 } 879 880 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 881 { 882 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 883 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 884 PetscMUMPSInt *row, *col; 885 const PetscScalar *av, *bv, *v1, *v2; 886 PetscScalar *val; 887 Mat Ad, Ao; 888 Mat_SeqAIJ *aa; 889 Mat_SeqAIJ *bb; 890 #if defined(PETSC_USE_COMPLEX) 891 PetscBool hermitian, isset; 892 #endif 893 894 PetscFunctionBegin; 895 #if defined(PETSC_USE_COMPLEX) 896 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 897 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 898 #endif 899 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 900 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 901 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 902 903 aa = (Mat_SeqAIJ *)(Ad)->data; 904 bb = (Mat_SeqAIJ *)(Ao)->data; 905 ai = aa->i; 906 aj = aa->j; 907 adiag = aa->diag; 908 bi = bb->i; 909 bj = bb->j; 910 911 rstart = A->rmap->rstart; 912 913 if (reuse == MAT_INITIAL_MATRIX) { 914 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 915 nzb = 0; /* num of upper triangular entries in mat->B */ 916 for (i = 0; i < m; i++) { 917 nza += (ai[i + 1] - adiag[i]); 918 countB = bi[i + 1] - bi[i]; 919 bjj = bj + bi[i]; 920 for (j = 0; j < countB; j++) { 921 if (garray[bjj[j]] > rstart) nzb++; 922 } 923 } 924 925 nz = nza + nzb; /* total nz of upper triangular part of mat */ 926 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 927 PetscCall(PetscMalloc1(nz, &val)); 928 mumps->nnz = nz; 929 mumps->irn = row; 930 mumps->jcn = col; 931 mumps->val = mumps->val_alloc = val; 932 } else { 933 val = mumps->val; 934 } 935 936 jj = 0; 937 irow = rstart; 938 for (i = 0; i < m; i++) { 939 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 940 v1 = av + adiag[i]; 941 countA = ai[i + 1] - adiag[i]; 942 countB = bi[i + 1] - bi[i]; 943 bjj = bj + bi[i]; 944 v2 = bv + bi[i]; 945 946 /* A-part */ 947 for (j = 0; j < countA; j++) { 948 if (reuse == MAT_INITIAL_MATRIX) { 949 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 950 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 951 } 952 val[jj++] = v1[j]; 953 } 954 955 /* B-part */ 956 for (j = 0; j < countB; j++) { 957 if (garray[bjj[j]] > rstart) { 958 if (reuse == MAT_INITIAL_MATRIX) { 959 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 960 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 961 } 962 val[jj++] = v2[j]; 963 } 964 } 965 irow++; 966 } 967 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 968 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 969 PetscFunctionReturn(PETSC_SUCCESS); 970 } 971 972 PetscErrorCode MatDestroy_MUMPS(Mat A) 973 { 974 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 975 976 PetscFunctionBegin; 977 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 978 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 979 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 980 PetscCall(VecDestroy(&mumps->b_seq)); 981 PetscCall(VecDestroy(&mumps->x_seq)); 982 PetscCall(PetscFree(mumps->id.perm_in)); 983 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 984 PetscCall(PetscFree(mumps->val_alloc)); 985 PetscCall(PetscFree(mumps->info)); 986 PetscCall(PetscFree(mumps->ICNTL_pre)); 987 PetscCall(PetscFree(mumps->CNTL_pre)); 988 PetscCall(MatMumpsResetSchur_Private(mumps)); 989 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 990 mumps->id.job = JOB_END; 991 PetscMUMPS_c(mumps); 992 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 993 if (mumps->mumps_comm != MPI_COMM_NULL) { 994 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 995 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 996 } 997 } 998 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 999 if (mumps->use_petsc_omp_support) { 1000 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1001 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1002 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1003 } 1004 #endif 1005 PetscCall(PetscFree(mumps->ia_alloc)); 1006 PetscCall(PetscFree(mumps->ja_alloc)); 1007 PetscCall(PetscFree(mumps->recvcount)); 1008 PetscCall(PetscFree(mumps->reqs)); 1009 PetscCall(PetscFree(mumps->irhs_loc)); 1010 PetscCall(PetscFree(A->data)); 1011 1012 /* clear composed functions */ 1013 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1014 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1015 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1016 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1017 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1018 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1019 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1020 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1021 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1022 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1023 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1024 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1025 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1026 PetscFunctionReturn(PETSC_SUCCESS); 1027 } 1028 1029 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1030 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1031 { 1032 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1033 const PetscMPIInt ompsize = mumps->omp_comm_size; 1034 PetscInt i, m, M, rstart; 1035 1036 PetscFunctionBegin; 1037 PetscCall(MatGetSize(A, &M, NULL)); 1038 PetscCall(MatGetLocalSize(A, &m, NULL)); 1039 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1040 if (ompsize == 1) { 1041 if (!mumps->irhs_loc) { 1042 mumps->nloc_rhs = m; 1043 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1044 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1045 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1046 } 1047 mumps->id.rhs_loc = (MumpsScalar *)array; 1048 } else { 1049 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1050 const PetscInt *ranges; 1051 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1052 MPI_Group petsc_group, omp_group; 1053 PetscScalar *recvbuf = NULL; 1054 1055 if (mumps->is_omp_master) { 1056 /* Lazily initialize the omp stuff for distributed rhs */ 1057 if (!mumps->irhs_loc) { 1058 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1059 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1060 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1061 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1062 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1063 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1064 1065 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1066 mumps->nloc_rhs = 0; 1067 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1068 for (j = 0; j < ompsize; j++) { 1069 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1070 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1071 } 1072 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1073 for (j = k = 0; j < ompsize; j++) { 1074 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1075 } 1076 1077 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1078 PetscCallMPI(MPI_Group_free(&petsc_group)); 1079 PetscCallMPI(MPI_Group_free(&omp_group)); 1080 } 1081 1082 /* Realloc buffers when current nrhs is bigger than what we have met */ 1083 if (nrhs > mumps->max_nrhs) { 1084 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1085 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1086 mumps->max_nrhs = nrhs; 1087 } 1088 1089 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1090 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1091 mumps->rhs_disps[0] = 0; 1092 for (j = 1; j < ompsize; j++) { 1093 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1094 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1095 } 1096 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1097 } 1098 1099 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1100 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1101 1102 if (mumps->is_omp_master) { 1103 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1104 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1105 for (j = 0; j < ompsize; j++) { 1106 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1107 dst = dstbase; 1108 for (i = 0; i < nrhs; i++) { 1109 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1110 src += mumps->rhs_nrow[j]; 1111 dst += mumps->nloc_rhs; 1112 } 1113 dstbase += mumps->rhs_nrow[j]; 1114 } 1115 } 1116 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1117 } 1118 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1119 } 1120 mumps->id.nrhs = nrhs; 1121 mumps->id.nloc_rhs = mumps->nloc_rhs; 1122 mumps->id.lrhs_loc = mumps->nloc_rhs; 1123 mumps->id.irhs_loc = mumps->irhs_loc; 1124 PetscFunctionReturn(PETSC_SUCCESS); 1125 } 1126 1127 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1128 { 1129 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1130 const PetscScalar *rarray = NULL; 1131 PetscScalar *array; 1132 IS is_iden, is_petsc; 1133 PetscInt i; 1134 PetscBool second_solve = PETSC_FALSE; 1135 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1136 1137 PetscFunctionBegin; 1138 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1139 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1140 &cite1)); 1141 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1142 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1143 &cite2)); 1144 1145 if (A->factorerrortype) { 1146 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1147 PetscCall(VecSetInf(x)); 1148 PetscFunctionReturn(PETSC_SUCCESS); 1149 } 1150 1151 mumps->id.nrhs = 1; 1152 if (mumps->petsc_size > 1) { 1153 if (mumps->ICNTL20 == 10) { 1154 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1155 PetscCall(VecGetArrayRead(b, &rarray)); 1156 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1157 } else { 1158 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1159 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1160 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1161 if (!mumps->myid) { 1162 PetscCall(VecGetArray(mumps->b_seq, &array)); 1163 mumps->id.rhs = (MumpsScalar *)array; 1164 } 1165 } 1166 } else { /* petsc_size == 1 */ 1167 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1168 PetscCall(VecCopy(b, x)); 1169 PetscCall(VecGetArray(x, &array)); 1170 mumps->id.rhs = (MumpsScalar *)array; 1171 } 1172 1173 /* 1174 handle condensation step of Schur complement (if any) 1175 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1176 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1177 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1178 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1179 */ 1180 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1181 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1182 second_solve = PETSC_TRUE; 1183 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1184 } 1185 /* solve phase */ 1186 mumps->id.job = JOB_SOLVE; 1187 PetscMUMPS_c(mumps); 1188 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1189 1190 /* handle expansion step of Schur complement (if any) */ 1191 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1192 1193 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1194 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1195 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1196 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1197 } 1198 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1199 PetscInt *isol2_loc = NULL; 1200 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1201 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1202 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1203 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1204 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1205 PetscCall(ISDestroy(&is_iden)); 1206 PetscCall(ISDestroy(&is_petsc)); 1207 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1208 } 1209 1210 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1211 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1212 } 1213 1214 if (mumps->petsc_size > 1) { 1215 if (mumps->ICNTL20 == 10) { 1216 PetscCall(VecRestoreArrayRead(b, &rarray)); 1217 } else if (!mumps->myid) { 1218 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1219 } 1220 } else PetscCall(VecRestoreArray(x, &array)); 1221 1222 PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3))); 1223 PetscFunctionReturn(PETSC_SUCCESS); 1224 } 1225 1226 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1227 { 1228 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1229 1230 PetscFunctionBegin; 1231 mumps->id.ICNTL(9) = 0; 1232 PetscCall(MatSolve_MUMPS(A, b, x)); 1233 mumps->id.ICNTL(9) = 1; 1234 PetscFunctionReturn(PETSC_SUCCESS); 1235 } 1236 1237 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1238 { 1239 Mat Bt = NULL; 1240 PetscBool denseX, denseB, flg, flgT; 1241 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1242 PetscInt i, nrhs, M; 1243 PetscScalar *array; 1244 const PetscScalar *rbray; 1245 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1246 PetscMUMPSInt *isol_loc, *isol_loc_save; 1247 PetscScalar *bray, *sol_loc, *sol_loc_save; 1248 IS is_to, is_from; 1249 PetscInt k, proc, j, m, myrstart; 1250 const PetscInt *rstart; 1251 Vec v_mpi, msol_loc; 1252 VecScatter scat_sol; 1253 Vec b_seq; 1254 VecScatter scat_rhs; 1255 PetscScalar *aa; 1256 PetscInt spnr, *ia, *ja; 1257 Mat_MPIAIJ *b = NULL; 1258 1259 PetscFunctionBegin; 1260 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1261 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1262 1263 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1264 if (denseB) { 1265 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1266 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1267 } else { /* sparse B */ 1268 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1269 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1270 if (flgT) { /* input B is transpose of actual RHS matrix, 1271 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1272 PetscCall(MatTransposeGetMat(B, &Bt)); 1273 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1274 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1275 } 1276 1277 PetscCall(MatGetSize(B, &M, &nrhs)); 1278 mumps->id.nrhs = nrhs; 1279 mumps->id.lrhs = M; 1280 mumps->id.rhs = NULL; 1281 1282 if (mumps->petsc_size == 1) { 1283 PetscScalar *aa; 1284 PetscInt spnr, *ia, *ja; 1285 PetscBool second_solve = PETSC_FALSE; 1286 1287 PetscCall(MatDenseGetArray(X, &array)); 1288 mumps->id.rhs = (MumpsScalar *)array; 1289 1290 if (denseB) { 1291 /* copy B to X */ 1292 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1293 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1294 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1295 } else { /* sparse B */ 1296 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1297 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1298 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1299 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1300 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1301 } 1302 /* handle condensation step of Schur complement (if any) */ 1303 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1304 second_solve = PETSC_TRUE; 1305 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1306 } 1307 /* solve phase */ 1308 mumps->id.job = JOB_SOLVE; 1309 PetscMUMPS_c(mumps); 1310 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1311 1312 /* handle expansion step of Schur complement (if any) */ 1313 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1314 if (!denseB) { /* sparse B */ 1315 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1316 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1317 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1318 } 1319 PetscCall(MatDenseRestoreArray(X, &array)); 1320 PetscFunctionReturn(PETSC_SUCCESS); 1321 } 1322 1323 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1324 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1325 1326 /* create msol_loc to hold mumps local solution */ 1327 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1328 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1329 1330 lsol_loc = mumps->id.lsol_loc; 1331 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1332 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1333 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1334 mumps->id.isol_loc = isol_loc; 1335 1336 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1337 1338 if (denseB) { 1339 if (mumps->ICNTL20 == 10) { 1340 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1341 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1342 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1343 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1344 PetscCall(MatGetLocalSize(B, &m, NULL)); 1345 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1346 } else { 1347 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1348 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1349 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1350 0, re-arrange B into desired order, which is a local operation. 1351 */ 1352 1353 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1354 /* wrap dense rhs matrix B into a vector v_mpi */ 1355 PetscCall(MatGetLocalSize(B, &m, NULL)); 1356 PetscCall(MatDenseGetArray(B, &bray)); 1357 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1358 PetscCall(MatDenseRestoreArray(B, &bray)); 1359 1360 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1361 if (!mumps->myid) { 1362 PetscInt *idx; 1363 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1364 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1365 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1366 k = 0; 1367 for (proc = 0; proc < mumps->petsc_size; proc++) { 1368 for (j = 0; j < nrhs; j++) { 1369 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1370 } 1371 } 1372 1373 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1374 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1375 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1376 } else { 1377 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1378 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1379 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1380 } 1381 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1382 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1383 PetscCall(ISDestroy(&is_to)); 1384 PetscCall(ISDestroy(&is_from)); 1385 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1386 1387 if (!mumps->myid) { /* define rhs on the host */ 1388 PetscCall(VecGetArray(b_seq, &bray)); 1389 mumps->id.rhs = (MumpsScalar *)bray; 1390 PetscCall(VecRestoreArray(b_seq, &bray)); 1391 } 1392 } 1393 } else { /* sparse B */ 1394 b = (Mat_MPIAIJ *)Bt->data; 1395 1396 /* wrap dense X into a vector v_mpi */ 1397 PetscCall(MatGetLocalSize(X, &m, NULL)); 1398 PetscCall(MatDenseGetArray(X, &bray)); 1399 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1400 PetscCall(MatDenseRestoreArray(X, &bray)); 1401 1402 if (!mumps->myid) { 1403 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1404 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1405 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1406 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1407 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1408 } else { 1409 mumps->id.irhs_ptr = NULL; 1410 mumps->id.irhs_sparse = NULL; 1411 mumps->id.nz_rhs = 0; 1412 mumps->id.rhs_sparse = NULL; 1413 } 1414 } 1415 1416 /* solve phase */ 1417 mumps->id.job = JOB_SOLVE; 1418 PetscMUMPS_c(mumps); 1419 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1420 1421 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1422 PetscCall(MatDenseGetArray(X, &array)); 1423 PetscCall(VecPlaceArray(v_mpi, array)); 1424 1425 /* create scatter scat_sol */ 1426 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1427 /* iidx: index for scatter mumps solution to petsc X */ 1428 1429 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1430 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1431 for (i = 0; i < lsol_loc; i++) { 1432 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1433 1434 for (proc = 0; proc < mumps->petsc_size; proc++) { 1435 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1436 myrstart = rstart[proc]; 1437 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1438 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1439 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1440 break; 1441 } 1442 } 1443 1444 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1445 } 1446 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1447 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1448 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1449 PetscCall(ISDestroy(&is_from)); 1450 PetscCall(ISDestroy(&is_to)); 1451 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1452 PetscCall(MatDenseRestoreArray(X, &array)); 1453 1454 /* free spaces */ 1455 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1456 mumps->id.isol_loc = isol_loc_save; 1457 1458 PetscCall(PetscFree2(sol_loc, isol_loc)); 1459 PetscCall(PetscFree(idxx)); 1460 PetscCall(VecDestroy(&msol_loc)); 1461 PetscCall(VecDestroy(&v_mpi)); 1462 if (!denseB) { 1463 if (!mumps->myid) { 1464 b = (Mat_MPIAIJ *)Bt->data; 1465 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1466 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1467 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1468 } 1469 } else { 1470 if (mumps->ICNTL20 == 0) { 1471 PetscCall(VecDestroy(&b_seq)); 1472 PetscCall(VecScatterDestroy(&scat_rhs)); 1473 } 1474 } 1475 PetscCall(VecScatterDestroy(&scat_sol)); 1476 PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3))); 1477 PetscFunctionReturn(PETSC_SUCCESS); 1478 } 1479 1480 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1481 { 1482 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1483 PetscMUMPSInt oldvalue = mumps->id.ICNTL(9); 1484 1485 PetscFunctionBegin; 1486 mumps->id.ICNTL(9) = 0; 1487 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1488 mumps->id.ICNTL(9) = oldvalue; 1489 PetscFunctionReturn(PETSC_SUCCESS); 1490 } 1491 1492 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1493 { 1494 PetscBool flg; 1495 Mat B; 1496 1497 PetscFunctionBegin; 1498 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1499 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1500 1501 /* Create B=Bt^T that uses Bt's data structure */ 1502 PetscCall(MatCreateTranspose(Bt, &B)); 1503 1504 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1505 PetscCall(MatDestroy(&B)); 1506 PetscFunctionReturn(PETSC_SUCCESS); 1507 } 1508 1509 #if !defined(PETSC_USE_COMPLEX) 1510 /* 1511 input: 1512 F: numeric factor 1513 output: 1514 nneg: total number of negative pivots 1515 nzero: total number of zero pivots 1516 npos: (global dimension of F) - nneg - nzero 1517 */ 1518 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1519 { 1520 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1521 PetscMPIInt size; 1522 1523 PetscFunctionBegin; 1524 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1525 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1526 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1527 1528 if (nneg) *nneg = mumps->id.INFOG(12); 1529 if (nzero || npos) { 1530 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1531 if (nzero) *nzero = mumps->id.INFOG(28); 1532 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1533 } 1534 PetscFunctionReturn(PETSC_SUCCESS); 1535 } 1536 #endif 1537 1538 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1539 { 1540 PetscInt i, nreqs; 1541 PetscMUMPSInt *irn, *jcn; 1542 PetscMPIInt count; 1543 PetscInt64 totnnz, remain; 1544 const PetscInt osize = mumps->omp_comm_size; 1545 PetscScalar *val; 1546 1547 PetscFunctionBegin; 1548 if (osize > 1) { 1549 if (reuse == MAT_INITIAL_MATRIX) { 1550 /* master first gathers counts of nonzeros to receive */ 1551 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1552 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1553 1554 /* Then each computes number of send/recvs */ 1555 if (mumps->is_omp_master) { 1556 /* Start from 1 since self communication is not done in MPI */ 1557 nreqs = 0; 1558 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1559 } else { 1560 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1561 } 1562 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1563 1564 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1565 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1566 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1567 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1568 */ 1569 nreqs = 0; /* counter for actual send/recvs */ 1570 if (mumps->is_omp_master) { 1571 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1572 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1573 PetscCall(PetscMalloc1(totnnz, &val)); 1574 1575 /* Self communication */ 1576 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1577 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1578 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1579 1580 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1581 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1582 PetscCall(PetscFree(mumps->val_alloc)); 1583 mumps->nnz = totnnz; 1584 mumps->irn = irn; 1585 mumps->jcn = jcn; 1586 mumps->val = mumps->val_alloc = val; 1587 1588 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1589 jcn += mumps->recvcount[0]; 1590 val += mumps->recvcount[0]; 1591 1592 /* Remote communication */ 1593 for (i = 1; i < osize; i++) { 1594 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1595 remain = mumps->recvcount[i] - count; 1596 while (count > 0) { 1597 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1598 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1599 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1600 irn += count; 1601 jcn += count; 1602 val += count; 1603 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1604 remain -= count; 1605 } 1606 } 1607 } else { 1608 irn = mumps->irn; 1609 jcn = mumps->jcn; 1610 val = mumps->val; 1611 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1612 remain = mumps->nnz - count; 1613 while (count > 0) { 1614 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1615 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1616 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1617 irn += count; 1618 jcn += count; 1619 val += count; 1620 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1621 remain -= count; 1622 } 1623 } 1624 } else { 1625 nreqs = 0; 1626 if (mumps->is_omp_master) { 1627 val = mumps->val + mumps->recvcount[0]; 1628 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1629 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1630 remain = mumps->recvcount[i] - count; 1631 while (count > 0) { 1632 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1633 val += count; 1634 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1635 remain -= count; 1636 } 1637 } 1638 } else { 1639 val = mumps->val; 1640 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1641 remain = mumps->nnz - count; 1642 while (count > 0) { 1643 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1644 val += count; 1645 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1646 remain -= count; 1647 } 1648 } 1649 } 1650 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1651 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1652 } 1653 PetscFunctionReturn(PETSC_SUCCESS); 1654 } 1655 1656 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1657 { 1658 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1659 PetscBool isMPIAIJ; 1660 1661 PetscFunctionBegin; 1662 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1663 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1664 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1665 PetscFunctionReturn(PETSC_SUCCESS); 1666 } 1667 1668 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1669 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1670 1671 /* numerical factorization phase */ 1672 mumps->id.job = JOB_FACTNUMERIC; 1673 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1674 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1675 } else { 1676 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1677 } 1678 PetscMUMPS_c(mumps); 1679 if (mumps->id.INFOG(1) < 0) { 1680 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 1681 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1682 PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1683 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1684 } else if (mumps->id.INFOG(1) == -13) { 1685 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1686 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1687 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1688 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1689 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1690 } else { 1691 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1692 F->factorerrortype = MAT_FACTOR_OTHER; 1693 } 1694 } 1695 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, " mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16)); 1696 1697 F->assembled = PETSC_TRUE; 1698 1699 if (F->schur) { /* reset Schur status to unfactored */ 1700 #if defined(PETSC_HAVE_CUDA) 1701 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1702 #endif 1703 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1704 mumps->id.ICNTL(19) = 2; 1705 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1706 } 1707 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1708 } 1709 1710 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1711 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1712 1713 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1714 if (mumps->petsc_size > 1) { 1715 PetscInt lsol_loc; 1716 PetscScalar *sol_loc; 1717 1718 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1719 1720 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1721 if (mumps->x_seq) { 1722 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1723 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1724 PetscCall(VecDestroy(&mumps->x_seq)); 1725 } 1726 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1727 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1728 mumps->id.lsol_loc = lsol_loc; 1729 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1730 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 1731 } 1732 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1733 PetscFunctionReturn(PETSC_SUCCESS); 1734 } 1735 1736 /* Sets MUMPS options from the options database */ 1737 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1738 { 1739 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1740 PetscMUMPSInt icntl = 0, size, *listvar_schur; 1741 PetscInt info[80], i, ninfo = 80, rbs, cbs; 1742 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1743 MumpsScalar *arr; 1744 1745 PetscFunctionBegin; 1746 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 1747 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1748 PetscInt nthreads = 0; 1749 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1750 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1751 1752 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1753 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 1754 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 1755 1756 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 1757 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1758 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1759 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 1760 if (mumps->use_petsc_omp_support) { 1761 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 1762 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1763 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1764 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1765 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 1766 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 1767 #endif 1768 } else { 1769 mumps->omp_comm = PETSC_COMM_SELF; 1770 mumps->mumps_comm = mumps->petsc_comm; 1771 mumps->is_omp_master = PETSC_TRUE; 1772 } 1773 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 1774 mumps->reqs = NULL; 1775 mumps->tag = 0; 1776 1777 if (mumps->mumps_comm != MPI_COMM_NULL) { 1778 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 1779 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1780 MPI_Comm comm; 1781 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 1782 mumps->mumps_comm = comm; 1783 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 1784 } 1785 1786 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1787 mumps->id.job = JOB_INIT; 1788 mumps->id.par = 1; /* host participates factorizaton and solve */ 1789 mumps->id.sym = mumps->sym; 1790 1791 size = mumps->id.size_schur; 1792 arr = mumps->id.schur; 1793 listvar_schur = mumps->id.listvar_schur; 1794 PetscMUMPS_c(mumps); 1795 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1796 /* restore cached ICNTL and CNTL values */ 1797 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 1798 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 1799 PetscCall(PetscFree(mumps->ICNTL_pre)); 1800 PetscCall(PetscFree(mumps->CNTL_pre)); 1801 1802 if (schur) { 1803 mumps->id.size_schur = size; 1804 mumps->id.schur_lld = size; 1805 mumps->id.schur = arr; 1806 mumps->id.listvar_schur = listvar_schur; 1807 if (mumps->petsc_size > 1) { 1808 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 1809 1810 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1811 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1812 PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 1813 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 1814 } else { 1815 if (F->factortype == MAT_FACTOR_LU) { 1816 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1817 } else { 1818 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1819 } 1820 } 1821 mumps->id.ICNTL(26) = -1; 1822 } 1823 1824 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1825 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1826 */ 1827 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 1828 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 1829 1830 mumps->scat_rhs = NULL; 1831 mumps->scat_sol = NULL; 1832 1833 /* set PETSc-MUMPS default options - override MUMPS default */ 1834 mumps->id.ICNTL(3) = 0; 1835 mumps->id.ICNTL(4) = 0; 1836 if (mumps->petsc_size == 1) { 1837 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1838 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1839 } else { 1840 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1841 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1842 } 1843 } 1844 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 1845 if (flg) mumps->id.ICNTL(1) = icntl; 1846 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 1847 if (flg) mumps->id.ICNTL(2) = icntl; 1848 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 1849 if (flg) mumps->id.ICNTL(3) = icntl; 1850 1851 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 1852 if (flg) mumps->id.ICNTL(4) = icntl; 1853 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1854 1855 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 1856 if (flg) mumps->id.ICNTL(6) = icntl; 1857 1858 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 1859 if (flg) { 1860 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 1861 mumps->id.ICNTL(7) = icntl; 1862 } 1863 1864 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 1865 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 1866 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 1867 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 1868 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 1869 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 1870 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 1871 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 1872 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 1873 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 1874 if (flg) { 1875 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 1876 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 1877 } 1878 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 1879 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1880 PetscCall(MatDestroy(&F->schur)); 1881 PetscCall(MatMumpsResetSchur_Private(mumps)); 1882 } 1883 1884 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 1885 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1886 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1887 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 1888 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 1889 In short, we could not use distributed RHS with MPICH until v4.0b1. 1890 */ 1891 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 1892 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1893 #else 1894 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1895 #endif 1896 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 1897 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 1898 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 1899 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 1900 #endif 1901 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 1902 1903 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 1904 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 1905 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 1906 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 1907 1908 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 1909 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 1910 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 1911 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 1912 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 1913 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 1914 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 1915 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 1916 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 1917 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 1918 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 1919 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 1920 1921 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 1922 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 1923 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 1924 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 1925 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 1926 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 1927 1928 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 1929 1930 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 1931 if (ninfo) { 1932 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 1933 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 1934 mumps->ninfo = ninfo; 1935 for (i = 0; i < ninfo; i++) { 1936 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 1937 mumps->info[i] = info[i]; 1938 } 1939 } 1940 PetscOptionsEnd(); 1941 PetscFunctionReturn(PETSC_SUCCESS); 1942 } 1943 1944 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 1945 { 1946 PetscFunctionBegin; 1947 if (mumps->id.INFOG(1) < 0) { 1948 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1949 if (mumps->id.INFOG(1) == -6) { 1950 PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1951 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1952 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1953 PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1954 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1955 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1956 PetscCall(PetscInfo(F, "Empty matrix\n")); 1957 } else { 1958 PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1959 F->factorerrortype = MAT_FACTOR_OTHER; 1960 } 1961 } 1962 PetscFunctionReturn(PETSC_SUCCESS); 1963 } 1964 1965 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 1966 { 1967 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1968 Vec b; 1969 const PetscInt M = A->rmap->N; 1970 1971 PetscFunctionBegin; 1972 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 1973 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 1974 PetscFunctionReturn(PETSC_SUCCESS); 1975 } 1976 1977 /* Set MUMPS options from the options database */ 1978 PetscCall(MatSetFromOptions_MUMPS(F, A)); 1979 1980 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 1981 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 1982 1983 /* analysis phase */ 1984 mumps->id.job = JOB_FACTSYMBOLIC; 1985 mumps->id.n = M; 1986 switch (mumps->id.ICNTL(18)) { 1987 case 0: /* centralized assembled matrix input */ 1988 if (!mumps->myid) { 1989 mumps->id.nnz = mumps->nnz; 1990 mumps->id.irn = mumps->irn; 1991 mumps->id.jcn = mumps->jcn; 1992 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 1993 if (r) { 1994 mumps->id.ICNTL(7) = 1; 1995 if (!mumps->myid) { 1996 const PetscInt *idx; 1997 PetscInt i; 1998 1999 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2000 PetscCall(ISGetIndices(r, &idx)); 2001 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2002 PetscCall(ISRestoreIndices(r, &idx)); 2003 } 2004 } 2005 } 2006 break; 2007 case 3: /* distributed assembled matrix input (size>1) */ 2008 mumps->id.nnz_loc = mumps->nnz; 2009 mumps->id.irn_loc = mumps->irn; 2010 mumps->id.jcn_loc = mumps->jcn; 2011 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2012 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2013 PetscCall(MatCreateVecs(A, NULL, &b)); 2014 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2015 PetscCall(VecDestroy(&b)); 2016 } 2017 break; 2018 } 2019 PetscMUMPS_c(mumps); 2020 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2021 2022 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2023 F->ops->solve = MatSolve_MUMPS; 2024 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2025 F->ops->matsolve = MatMatSolve_MUMPS; 2026 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2027 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2028 2029 mumps->matstruc = SAME_NONZERO_PATTERN; 2030 PetscFunctionReturn(PETSC_SUCCESS); 2031 } 2032 2033 /* Note the Petsc r and c permutations are ignored */ 2034 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2035 { 2036 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2037 Vec b; 2038 const PetscInt M = A->rmap->N; 2039 2040 PetscFunctionBegin; 2041 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2042 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2043 PetscFunctionReturn(PETSC_SUCCESS); 2044 } 2045 2046 /* Set MUMPS options from the options database */ 2047 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2048 2049 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2050 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2051 2052 /* analysis phase */ 2053 mumps->id.job = JOB_FACTSYMBOLIC; 2054 mumps->id.n = M; 2055 switch (mumps->id.ICNTL(18)) { 2056 case 0: /* centralized assembled matrix input */ 2057 if (!mumps->myid) { 2058 mumps->id.nnz = mumps->nnz; 2059 mumps->id.irn = mumps->irn; 2060 mumps->id.jcn = mumps->jcn; 2061 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2062 } 2063 break; 2064 case 3: /* distributed assembled matrix input (size>1) */ 2065 mumps->id.nnz_loc = mumps->nnz; 2066 mumps->id.irn_loc = mumps->irn; 2067 mumps->id.jcn_loc = mumps->jcn; 2068 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2069 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2070 PetscCall(MatCreateVecs(A, NULL, &b)); 2071 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2072 PetscCall(VecDestroy(&b)); 2073 } 2074 break; 2075 } 2076 PetscMUMPS_c(mumps); 2077 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2078 2079 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2080 F->ops->solve = MatSolve_MUMPS; 2081 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2082 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2083 2084 mumps->matstruc = SAME_NONZERO_PATTERN; 2085 PetscFunctionReturn(PETSC_SUCCESS); 2086 } 2087 2088 /* Note the Petsc r permutation and factor info are ignored */ 2089 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2090 { 2091 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2092 Vec b; 2093 const PetscInt M = A->rmap->N; 2094 2095 PetscFunctionBegin; 2096 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2097 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2098 PetscFunctionReturn(PETSC_SUCCESS); 2099 } 2100 2101 /* Set MUMPS options from the options database */ 2102 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2103 2104 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2105 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2106 2107 /* analysis phase */ 2108 mumps->id.job = JOB_FACTSYMBOLIC; 2109 mumps->id.n = M; 2110 switch (mumps->id.ICNTL(18)) { 2111 case 0: /* centralized assembled matrix input */ 2112 if (!mumps->myid) { 2113 mumps->id.nnz = mumps->nnz; 2114 mumps->id.irn = mumps->irn; 2115 mumps->id.jcn = mumps->jcn; 2116 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2117 } 2118 break; 2119 case 3: /* distributed assembled matrix input (size>1) */ 2120 mumps->id.nnz_loc = mumps->nnz; 2121 mumps->id.irn_loc = mumps->irn; 2122 mumps->id.jcn_loc = mumps->jcn; 2123 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2124 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2125 PetscCall(MatCreateVecs(A, NULL, &b)); 2126 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2127 PetscCall(VecDestroy(&b)); 2128 } 2129 break; 2130 } 2131 PetscMUMPS_c(mumps); 2132 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2133 2134 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2135 F->ops->solve = MatSolve_MUMPS; 2136 F->ops->solvetranspose = MatSolve_MUMPS; 2137 F->ops->matsolve = MatMatSolve_MUMPS; 2138 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2139 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2140 #if defined(PETSC_USE_COMPLEX) 2141 F->ops->getinertia = NULL; 2142 #else 2143 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2144 #endif 2145 2146 mumps->matstruc = SAME_NONZERO_PATTERN; 2147 PetscFunctionReturn(PETSC_SUCCESS); 2148 } 2149 2150 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2151 { 2152 PetscBool iascii; 2153 PetscViewerFormat format; 2154 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2155 2156 PetscFunctionBegin; 2157 /* check if matrix is mumps type */ 2158 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2159 2160 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2161 if (iascii) { 2162 PetscCall(PetscViewerGetFormat(viewer, &format)); 2163 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2164 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2165 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2166 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2167 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2168 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2169 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2170 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2171 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2172 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2173 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2174 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2175 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2176 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2177 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2178 if (mumps->id.ICNTL(11) > 0) { 2179 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2180 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2181 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2182 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2183 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2184 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2185 } 2186 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2187 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2188 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2189 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2190 /* ICNTL(15-17) not used */ 2191 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2192 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2193 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2194 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2195 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2196 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2197 2198 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2199 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2200 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2201 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2202 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2203 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2204 2205 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2206 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2207 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2208 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2209 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2210 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2211 2212 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2213 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2214 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2215 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2216 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2217 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2218 2219 /* information local to each processor */ 2220 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2221 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2222 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2223 PetscCall(PetscViewerFlush(viewer)); 2224 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2225 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2226 PetscCall(PetscViewerFlush(viewer)); 2227 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2228 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2229 PetscCall(PetscViewerFlush(viewer)); 2230 2231 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2232 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2233 PetscCall(PetscViewerFlush(viewer)); 2234 2235 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2236 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2237 PetscCall(PetscViewerFlush(viewer)); 2238 2239 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2240 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2241 PetscCall(PetscViewerFlush(viewer)); 2242 2243 if (mumps->ninfo && mumps->ninfo <= 80) { 2244 PetscInt i; 2245 for (i = 0; i < mumps->ninfo; i++) { 2246 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2247 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2248 PetscCall(PetscViewerFlush(viewer)); 2249 } 2250 } 2251 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2252 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2253 2254 if (mumps->myid == 0) { /* information from the host */ 2255 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2256 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2257 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2258 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2259 2260 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2261 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2262 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2263 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2264 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2265 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2266 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2267 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2268 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2269 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2270 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2271 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2272 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2273 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2274 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2275 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2276 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2277 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2278 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2279 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2280 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2281 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2282 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2283 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2284 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2285 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2286 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2287 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2288 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2289 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2290 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2291 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2292 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2293 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2294 } 2295 } 2296 } 2297 PetscFunctionReturn(PETSC_SUCCESS); 2298 } 2299 2300 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2301 { 2302 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2303 2304 PetscFunctionBegin; 2305 info->block_size = 1.0; 2306 info->nz_allocated = mumps->id.INFOG(20); 2307 info->nz_used = mumps->id.INFOG(20); 2308 info->nz_unneeded = 0.0; 2309 info->assemblies = 0.0; 2310 info->mallocs = 0.0; 2311 info->memory = 0.0; 2312 info->fill_ratio_given = 0; 2313 info->fill_ratio_needed = 0; 2314 info->factor_mallocs = 0; 2315 PetscFunctionReturn(PETSC_SUCCESS); 2316 } 2317 2318 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2319 { 2320 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2321 const PetscScalar *arr; 2322 const PetscInt *idxs; 2323 PetscInt size, i; 2324 2325 PetscFunctionBegin; 2326 PetscCall(ISGetLocalSize(is, &size)); 2327 /* Schur complement matrix */ 2328 PetscCall(MatDestroy(&F->schur)); 2329 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2330 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2331 mumps->id.schur = (MumpsScalar *)arr; 2332 mumps->id.size_schur = size; 2333 mumps->id.schur_lld = size; 2334 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2335 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2336 2337 /* MUMPS expects Fortran style indices */ 2338 PetscCall(PetscFree(mumps->id.listvar_schur)); 2339 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2340 PetscCall(ISGetIndices(is, &idxs)); 2341 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2342 PetscCall(ISRestoreIndices(is, &idxs)); 2343 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2344 mumps->id.ICNTL(26) = -1; 2345 PetscFunctionReturn(PETSC_SUCCESS); 2346 } 2347 2348 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2349 { 2350 Mat St; 2351 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2352 PetscScalar *array; 2353 #if defined(PETSC_USE_COMPLEX) 2354 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2355 #endif 2356 2357 PetscFunctionBegin; 2358 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2359 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2360 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2361 PetscCall(MatSetType(St, MATDENSE)); 2362 PetscCall(MatSetUp(St)); 2363 PetscCall(MatDenseGetArray(St, &array)); 2364 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2365 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2366 PetscInt i, j, N = mumps->id.size_schur; 2367 for (i = 0; i < N; i++) { 2368 for (j = 0; j < N; j++) { 2369 #if !defined(PETSC_USE_COMPLEX) 2370 PetscScalar val = mumps->id.schur[i * N + j]; 2371 #else 2372 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2373 #endif 2374 array[j * N + i] = val; 2375 } 2376 } 2377 } else { /* stored by columns */ 2378 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2379 } 2380 } else { /* either full or lower-triangular (not packed) */ 2381 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2382 PetscInt i, j, N = mumps->id.size_schur; 2383 for (i = 0; i < N; i++) { 2384 for (j = i; j < N; j++) { 2385 #if !defined(PETSC_USE_COMPLEX) 2386 PetscScalar val = mumps->id.schur[i * N + j]; 2387 #else 2388 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2389 #endif 2390 array[i * N + j] = val; 2391 array[j * N + i] = val; 2392 } 2393 } 2394 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2395 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2396 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2397 PetscInt i, j, N = mumps->id.size_schur; 2398 for (i = 0; i < N; i++) { 2399 for (j = 0; j < i + 1; j++) { 2400 #if !defined(PETSC_USE_COMPLEX) 2401 PetscScalar val = mumps->id.schur[i * N + j]; 2402 #else 2403 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2404 #endif 2405 array[i * N + j] = val; 2406 array[j * N + i] = val; 2407 } 2408 } 2409 } 2410 } 2411 PetscCall(MatDenseRestoreArray(St, &array)); 2412 *S = St; 2413 PetscFunctionReturn(PETSC_SUCCESS); 2414 } 2415 2416 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2417 { 2418 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2419 2420 PetscFunctionBegin; 2421 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2422 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2423 for (i = 0; i < nICNTL_pre; ++i) 2424 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2425 if (i == nICNTL_pre) { /* not already cached */ 2426 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2427 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2428 mumps->ICNTL_pre[0]++; 2429 } 2430 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2431 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2432 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2433 PetscFunctionReturn(PETSC_SUCCESS); 2434 } 2435 2436 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2437 { 2438 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2439 2440 PetscFunctionBegin; 2441 *ival = mumps->id.ICNTL(icntl); 2442 PetscFunctionReturn(PETSC_SUCCESS); 2443 } 2444 2445 /*@ 2446 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2447 2448 Logically Collective 2449 2450 Input Parameters: 2451 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2452 . icntl - index of MUMPS parameter array ICNTL() 2453 - ival - value of MUMPS ICNTL(icntl) 2454 2455 Options Database Key: 2456 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2457 2458 Level: beginner 2459 2460 References: 2461 . * - MUMPS Users' Guide 2462 2463 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2464 @*/ 2465 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2466 { 2467 PetscFunctionBegin; 2468 PetscValidType(F, 1); 2469 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2470 PetscValidLogicalCollectiveInt(F, icntl, 2); 2471 PetscValidLogicalCollectiveInt(F, ival, 3); 2472 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2473 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2474 PetscFunctionReturn(PETSC_SUCCESS); 2475 } 2476 2477 /*@ 2478 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2479 2480 Logically Collective 2481 2482 Input Parameters: 2483 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2484 - icntl - index of MUMPS parameter array ICNTL() 2485 2486 Output Parameter: 2487 . ival - value of MUMPS ICNTL(icntl) 2488 2489 Level: beginner 2490 2491 References: 2492 . * - MUMPS Users' Guide 2493 2494 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2495 @*/ 2496 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2497 { 2498 PetscFunctionBegin; 2499 PetscValidType(F, 1); 2500 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2501 PetscValidLogicalCollectiveInt(F, icntl, 2); 2502 PetscValidIntPointer(ival, 3); 2503 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2504 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2505 PetscFunctionReturn(PETSC_SUCCESS); 2506 } 2507 2508 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2509 { 2510 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2511 2512 PetscFunctionBegin; 2513 if (mumps->id.job == JOB_NULL) { 2514 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2515 for (i = 0; i < nCNTL_pre; ++i) 2516 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2517 if (i == nCNTL_pre) { 2518 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2519 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2520 mumps->CNTL_pre[0]++; 2521 } 2522 mumps->CNTL_pre[1 + 2 * i] = icntl; 2523 mumps->CNTL_pre[2 + 2 * i] = val; 2524 } else mumps->id.CNTL(icntl) = val; 2525 PetscFunctionReturn(PETSC_SUCCESS); 2526 } 2527 2528 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2529 { 2530 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2531 2532 PetscFunctionBegin; 2533 *val = mumps->id.CNTL(icntl); 2534 PetscFunctionReturn(PETSC_SUCCESS); 2535 } 2536 2537 /*@ 2538 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2539 2540 Logically Collective 2541 2542 Input Parameters: 2543 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2544 . icntl - index of MUMPS parameter array CNTL() 2545 - val - value of MUMPS CNTL(icntl) 2546 2547 Options Database Key: 2548 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2549 2550 Level: beginner 2551 2552 References: 2553 . * - MUMPS Users' Guide 2554 2555 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2556 @*/ 2557 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2558 { 2559 PetscFunctionBegin; 2560 PetscValidType(F, 1); 2561 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2562 PetscValidLogicalCollectiveInt(F, icntl, 2); 2563 PetscValidLogicalCollectiveReal(F, val, 3); 2564 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2565 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2566 PetscFunctionReturn(PETSC_SUCCESS); 2567 } 2568 2569 /*@ 2570 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2571 2572 Logically Collective 2573 2574 Input Parameters: 2575 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2576 - icntl - index of MUMPS parameter array CNTL() 2577 2578 Output Parameter: 2579 . val - value of MUMPS CNTL(icntl) 2580 2581 Level: beginner 2582 2583 References: 2584 . * - MUMPS Users' Guide 2585 2586 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2587 @*/ 2588 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2589 { 2590 PetscFunctionBegin; 2591 PetscValidType(F, 1); 2592 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2593 PetscValidLogicalCollectiveInt(F, icntl, 2); 2594 PetscValidRealPointer(val, 3); 2595 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2596 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2597 PetscFunctionReturn(PETSC_SUCCESS); 2598 } 2599 2600 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2601 { 2602 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2603 2604 PetscFunctionBegin; 2605 *info = mumps->id.INFO(icntl); 2606 PetscFunctionReturn(PETSC_SUCCESS); 2607 } 2608 2609 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2610 { 2611 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2612 2613 PetscFunctionBegin; 2614 *infog = mumps->id.INFOG(icntl); 2615 PetscFunctionReturn(PETSC_SUCCESS); 2616 } 2617 2618 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2619 { 2620 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2621 2622 PetscFunctionBegin; 2623 *rinfo = mumps->id.RINFO(icntl); 2624 PetscFunctionReturn(PETSC_SUCCESS); 2625 } 2626 2627 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2628 { 2629 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2630 2631 PetscFunctionBegin; 2632 *rinfog = mumps->id.RINFOG(icntl); 2633 PetscFunctionReturn(PETSC_SUCCESS); 2634 } 2635 2636 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2637 { 2638 Mat Bt = NULL, Btseq = NULL; 2639 PetscBool flg; 2640 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2641 PetscScalar *aa; 2642 PetscInt spnr, *ia, *ja, M, nrhs; 2643 2644 PetscFunctionBegin; 2645 PetscValidPointer(spRHS, 2); 2646 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2647 if (flg) { 2648 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2649 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2650 2651 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2652 2653 if (mumps->petsc_size > 1) { 2654 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2655 Btseq = b->A; 2656 } else { 2657 Btseq = Bt; 2658 } 2659 2660 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2661 mumps->id.nrhs = nrhs; 2662 mumps->id.lrhs = M; 2663 mumps->id.rhs = NULL; 2664 2665 if (!mumps->myid) { 2666 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2667 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2668 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2669 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2670 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2671 } else { 2672 mumps->id.irhs_ptr = NULL; 2673 mumps->id.irhs_sparse = NULL; 2674 mumps->id.nz_rhs = 0; 2675 mumps->id.rhs_sparse = NULL; 2676 } 2677 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2678 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2679 2680 /* solve phase */ 2681 mumps->id.job = JOB_SOLVE; 2682 PetscMUMPS_c(mumps); 2683 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 2684 2685 if (!mumps->myid) { 2686 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 2687 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2688 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2689 } 2690 PetscFunctionReturn(PETSC_SUCCESS); 2691 } 2692 2693 /*@ 2694 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 2695 2696 Logically Collective 2697 2698 Input Parameter: 2699 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2700 2701 Output Parameter: 2702 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 2703 2704 Level: beginner 2705 2706 References: 2707 . * - MUMPS Users' Guide 2708 2709 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 2710 @*/ 2711 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2712 { 2713 PetscFunctionBegin; 2714 PetscValidType(F, 1); 2715 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2716 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 2717 PetscFunctionReturn(PETSC_SUCCESS); 2718 } 2719 2720 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 2721 { 2722 Mat spRHS; 2723 2724 PetscFunctionBegin; 2725 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 2726 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 2727 PetscCall(MatDestroy(&spRHS)); 2728 PetscFunctionReturn(PETSC_SUCCESS); 2729 } 2730 2731 /*@ 2732 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 2733 2734 Logically Collective 2735 2736 Input Parameter: 2737 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2738 2739 Output Parameter: 2740 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 2741 2742 Level: beginner 2743 2744 References: 2745 . * - MUMPS Users' Guide 2746 2747 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 2748 @*/ 2749 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 2750 { 2751 PetscBool flg; 2752 2753 PetscFunctionBegin; 2754 PetscValidType(F, 1); 2755 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2756 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 2757 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 2758 2759 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 2760 PetscFunctionReturn(PETSC_SUCCESS); 2761 } 2762 2763 /*@ 2764 MatMumpsGetInfo - Get MUMPS parameter INFO() 2765 2766 Logically Collective 2767 2768 Input Parameters: 2769 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2770 - icntl - index of MUMPS parameter array INFO() 2771 2772 Output Parameter: 2773 . ival - value of MUMPS INFO(icntl) 2774 2775 Level: beginner 2776 2777 References: 2778 . * - MUMPS Users' Guide 2779 2780 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2781 @*/ 2782 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 2783 { 2784 PetscFunctionBegin; 2785 PetscValidType(F, 1); 2786 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2787 PetscValidIntPointer(ival, 3); 2788 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2789 PetscFunctionReturn(PETSC_SUCCESS); 2790 } 2791 2792 /*@ 2793 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2794 2795 Logically Collective 2796 2797 Input Parameters: 2798 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2799 - icntl - index of MUMPS parameter array INFOG() 2800 2801 Output Parameter: 2802 . ival - value of MUMPS INFOG(icntl) 2803 2804 Level: beginner 2805 2806 References: 2807 . * - MUMPS Users' Guide 2808 2809 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2810 @*/ 2811 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 2812 { 2813 PetscFunctionBegin; 2814 PetscValidType(F, 1); 2815 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2816 PetscValidIntPointer(ival, 3); 2817 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2818 PetscFunctionReturn(PETSC_SUCCESS); 2819 } 2820 2821 /*@ 2822 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2823 2824 Logically Collective 2825 2826 Input Parameters: 2827 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2828 - icntl - index of MUMPS parameter array RINFO() 2829 2830 Output Parameter: 2831 . val - value of MUMPS RINFO(icntl) 2832 2833 Level: beginner 2834 2835 References: 2836 . * - MUMPS Users' Guide 2837 2838 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 2839 @*/ 2840 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 2841 { 2842 PetscFunctionBegin; 2843 PetscValidType(F, 1); 2844 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2845 PetscValidRealPointer(val, 3); 2846 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2847 PetscFunctionReturn(PETSC_SUCCESS); 2848 } 2849 2850 /*@ 2851 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2852 2853 Logically Collective 2854 2855 Input Parameters: 2856 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2857 - icntl - index of MUMPS parameter array RINFOG() 2858 2859 Output Parameter: 2860 . val - value of MUMPS RINFOG(icntl) 2861 2862 Level: beginner 2863 2864 References: 2865 . * - MUMPS Users' Guide 2866 2867 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 2868 @*/ 2869 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 2870 { 2871 PetscFunctionBegin; 2872 PetscValidType(F, 1); 2873 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2874 PetscValidRealPointer(val, 3); 2875 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2876 PetscFunctionReturn(PETSC_SUCCESS); 2877 } 2878 2879 /*MC 2880 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2881 distributed and sequential matrices via the external package MUMPS. 2882 2883 Works with `MATAIJ` and `MATSBAIJ` matrices 2884 2885 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2886 2887 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 2888 See details below. 2889 2890 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 2891 2892 Options Database Keys: 2893 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2894 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2895 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2896 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2897 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2898 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2899 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 2900 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2901 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2902 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2903 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2904 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2905 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2906 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 2907 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2908 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2909 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2910 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2911 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2912 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2913 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2914 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2915 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2916 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2917 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2918 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2919 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2920 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2921 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2922 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2923 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2924 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2925 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2926 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2927 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2928 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2929 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2930 2931 Level: beginner 2932 2933 Notes: 2934 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 2935 error if the matrix is Hermitian. 2936 2937 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 2938 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 2939 2940 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 2941 the failure with 2942 .vb 2943 KSPGetPC(ksp,&pc); 2944 PCFactorGetMatrix(pc,&mat); 2945 MatMumpsGetInfo(mat,....); 2946 MatMumpsGetInfog(mat,....); etc. 2947 .ve 2948 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 2949 2950 MUMPS provides 64-bit integer support in two build modes: 2951 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 2952 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 2953 2954 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 2955 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 2956 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 2957 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 2958 2959 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 2960 2961 Two modes to run MUMPS/PETSc with OpenMP 2962 .vb 2963 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2964 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2965 .ve 2966 2967 .vb 2968 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2969 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2970 .ve 2971 2972 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2973 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 2974 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2975 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2976 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2977 2978 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2979 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2980 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2981 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2982 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2983 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2984 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2985 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2986 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2987 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2988 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2989 examine the mapping result. 2990 2991 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 2992 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 2993 calls `omp_set_num_threads`(m) internally before calling MUMPS. 2994 2995 References: 2996 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2997 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2998 2999 .seealso: [](chapter_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3000 M*/ 3001 3002 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3003 { 3004 PetscFunctionBegin; 3005 *type = MATSOLVERMUMPS; 3006 PetscFunctionReturn(PETSC_SUCCESS); 3007 } 3008 3009 /* MatGetFactor for Seq and MPI AIJ matrices */ 3010 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3011 { 3012 Mat B; 3013 Mat_MUMPS *mumps; 3014 PetscBool isSeqAIJ; 3015 PetscMPIInt size; 3016 3017 PetscFunctionBegin; 3018 #if defined(PETSC_USE_COMPLEX) 3019 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3020 #endif 3021 /* Create the factorization matrix */ 3022 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3023 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3024 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3025 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3026 PetscCall(MatSetUp(B)); 3027 3028 PetscCall(PetscNew(&mumps)); 3029 3030 B->ops->view = MatView_MUMPS; 3031 B->ops->getinfo = MatGetInfo_MUMPS; 3032 3033 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3034 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3035 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3036 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3037 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3038 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3039 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3040 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3041 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3042 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3043 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3044 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3045 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3046 3047 if (ftype == MAT_FACTOR_LU) { 3048 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3049 B->factortype = MAT_FACTOR_LU; 3050 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3051 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3052 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3053 mumps->sym = 0; 3054 } else { 3055 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3056 B->factortype = MAT_FACTOR_CHOLESKY; 3057 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3058 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3059 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3060 #if defined(PETSC_USE_COMPLEX) 3061 mumps->sym = 2; 3062 #else 3063 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3064 else mumps->sym = 2; 3065 #endif 3066 } 3067 3068 /* set solvertype */ 3069 PetscCall(PetscFree(B->solvertype)); 3070 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3071 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3072 if (size == 1) { 3073 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3074 B->canuseordering = PETSC_TRUE; 3075 } 3076 B->ops->destroy = MatDestroy_MUMPS; 3077 B->data = (void *)mumps; 3078 3079 *F = B; 3080 mumps->id.job = JOB_NULL; 3081 mumps->ICNTL_pre = NULL; 3082 mumps->CNTL_pre = NULL; 3083 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3084 PetscFunctionReturn(PETSC_SUCCESS); 3085 } 3086 3087 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3088 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3089 { 3090 Mat B; 3091 Mat_MUMPS *mumps; 3092 PetscBool isSeqSBAIJ; 3093 PetscMPIInt size; 3094 3095 PetscFunctionBegin; 3096 #if defined(PETSC_USE_COMPLEX) 3097 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3098 #endif 3099 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3100 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3101 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3102 PetscCall(MatSetUp(B)); 3103 3104 PetscCall(PetscNew(&mumps)); 3105 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3106 if (isSeqSBAIJ) { 3107 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3108 } else { 3109 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3110 } 3111 3112 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3113 B->ops->view = MatView_MUMPS; 3114 B->ops->getinfo = MatGetInfo_MUMPS; 3115 3116 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3117 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3118 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3119 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3120 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3121 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3122 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3123 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3124 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3125 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3126 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3127 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3128 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3129 3130 B->factortype = MAT_FACTOR_CHOLESKY; 3131 #if defined(PETSC_USE_COMPLEX) 3132 mumps->sym = 2; 3133 #else 3134 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3135 else mumps->sym = 2; 3136 #endif 3137 3138 /* set solvertype */ 3139 PetscCall(PetscFree(B->solvertype)); 3140 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3141 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3142 if (size == 1) { 3143 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3144 B->canuseordering = PETSC_TRUE; 3145 } 3146 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3147 B->ops->destroy = MatDestroy_MUMPS; 3148 B->data = (void *)mumps; 3149 3150 *F = B; 3151 mumps->id.job = JOB_NULL; 3152 mumps->ICNTL_pre = NULL; 3153 mumps->CNTL_pre = NULL; 3154 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3155 PetscFunctionReturn(PETSC_SUCCESS); 3156 } 3157 3158 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3159 { 3160 Mat B; 3161 Mat_MUMPS *mumps; 3162 PetscBool isSeqBAIJ; 3163 PetscMPIInt size; 3164 3165 PetscFunctionBegin; 3166 /* Create the factorization matrix */ 3167 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3168 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3169 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3170 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3171 PetscCall(MatSetUp(B)); 3172 3173 PetscCall(PetscNew(&mumps)); 3174 if (ftype == MAT_FACTOR_LU) { 3175 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3176 B->factortype = MAT_FACTOR_LU; 3177 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3178 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3179 mumps->sym = 0; 3180 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3181 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3182 3183 B->ops->view = MatView_MUMPS; 3184 B->ops->getinfo = MatGetInfo_MUMPS; 3185 3186 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3187 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3188 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3189 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3190 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3191 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3192 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3193 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3194 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3195 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3196 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3197 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3198 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3199 3200 /* set solvertype */ 3201 PetscCall(PetscFree(B->solvertype)); 3202 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3203 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3204 if (size == 1) { 3205 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3206 B->canuseordering = PETSC_TRUE; 3207 } 3208 B->ops->destroy = MatDestroy_MUMPS; 3209 B->data = (void *)mumps; 3210 3211 *F = B; 3212 mumps->id.job = JOB_NULL; 3213 mumps->ICNTL_pre = NULL; 3214 mumps->CNTL_pre = NULL; 3215 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3216 PetscFunctionReturn(PETSC_SUCCESS); 3217 } 3218 3219 /* MatGetFactor for Seq and MPI SELL matrices */ 3220 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3221 { 3222 Mat B; 3223 Mat_MUMPS *mumps; 3224 PetscBool isSeqSELL; 3225 PetscMPIInt size; 3226 3227 PetscFunctionBegin; 3228 /* Create the factorization matrix */ 3229 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3230 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3231 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3232 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3233 PetscCall(MatSetUp(B)); 3234 3235 PetscCall(PetscNew(&mumps)); 3236 3237 B->ops->view = MatView_MUMPS; 3238 B->ops->getinfo = MatGetInfo_MUMPS; 3239 3240 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3241 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3242 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3243 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3244 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3245 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3246 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3247 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3248 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3249 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3250 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3251 3252 if (ftype == MAT_FACTOR_LU) { 3253 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3254 B->factortype = MAT_FACTOR_LU; 3255 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3256 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3257 mumps->sym = 0; 3258 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3259 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3260 3261 /* set solvertype */ 3262 PetscCall(PetscFree(B->solvertype)); 3263 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3264 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3265 if (size == 1) { 3266 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3267 B->canuseordering = PETSC_TRUE; 3268 } 3269 B->ops->destroy = MatDestroy_MUMPS; 3270 B->data = (void *)mumps; 3271 3272 *F = B; 3273 mumps->id.job = JOB_NULL; 3274 mumps->ICNTL_pre = NULL; 3275 mumps->CNTL_pre = NULL; 3276 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3277 PetscFunctionReturn(PETSC_SUCCESS); 3278 } 3279 3280 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3281 { 3282 PetscFunctionBegin; 3283 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3284 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3285 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3286 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3287 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3288 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3289 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3290 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3291 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3292 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3293 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3294 PetscFunctionReturn(PETSC_SUCCESS); 3295 } 3296