1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_NULL 0 27 #define JOB_FACTSYMBOLIC 1 28 #define JOB_FACTNUMERIC 2 29 #define JOB_SOLVE 3 30 #define JOB_END -2 31 32 /* calls to MUMPS */ 33 #if defined(PETSC_USE_COMPLEX) 34 #if defined(PETSC_USE_REAL_SINGLE) 35 #define MUMPS_c cmumps_c 36 #else 37 #define MUMPS_c zmumps_c 38 #endif 39 #else 40 #if defined(PETSC_USE_REAL_SINGLE) 41 #define MUMPS_c smumps_c 42 #else 43 #define MUMPS_c dmumps_c 44 #endif 45 #endif 46 47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 48 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 49 naming convention in PetscMPIInt, PetscBLASInt etc. 50 */ 51 typedef MUMPS_INT PetscMUMPSInt; 52 53 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 54 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 55 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 56 #endif 57 #else 58 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 59 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 60 #endif 61 #endif 62 63 #define MPIU_MUMPSINT MPI_INT 64 #define PETSC_MUMPS_INT_MAX 2147483647 65 #define PETSC_MUMPS_INT_MIN -2147483648 66 67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) { 69 PetscFunctionBegin; 70 #if PetscDefined(USE_64BIT_INDICES) 71 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 72 #endif 73 *b = (PetscMUMPSInt)(a); 74 PetscFunctionReturn(0); 75 } 76 77 /* Put these utility routines here since they are only used in this file */ 78 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) { 79 PetscInt myval; 80 PetscBool myset; 81 PetscFunctionBegin; 82 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 83 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 84 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 85 if (set) *set = myset; 86 PetscFunctionReturn(0); 87 } 88 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 89 90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 91 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 92 #define PetscMUMPS_c(mumps) \ 93 do { \ 94 if (mumps->use_petsc_omp_support) { \ 95 if (mumps->is_omp_master) { \ 96 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 97 MUMPS_c(&mumps->id); \ 98 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 99 } \ 100 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 101 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 102 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 103 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 104 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 105 */ \ 106 PetscCallMPI(MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 107 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm)); \ 108 PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 109 } else { \ 110 MUMPS_c(&mumps->id); \ 111 } \ 112 } while (0) 113 #else 114 #define PetscMUMPS_c(mumps) \ 115 do { MUMPS_c(&mumps->id); } while (0) 116 #endif 117 118 /* declare MumpsScalar */ 119 #if defined(PETSC_USE_COMPLEX) 120 #if defined(PETSC_USE_REAL_SINGLE) 121 #define MumpsScalar mumps_complex 122 #else 123 #define MumpsScalar mumps_double_complex 124 #endif 125 #else 126 #define MumpsScalar PetscScalar 127 #endif 128 129 /* macros s.t. indices match MUMPS documentation */ 130 #define ICNTL(I) icntl[(I)-1] 131 #define CNTL(I) cntl[(I)-1] 132 #define INFOG(I) infog[(I)-1] 133 #define INFO(I) info[(I)-1] 134 #define RINFOG(I) rinfog[(I)-1] 135 #define RINFO(I) rinfo[(I)-1] 136 137 typedef struct Mat_MUMPS Mat_MUMPS; 138 struct Mat_MUMPS { 139 #if defined(PETSC_USE_COMPLEX) 140 #if defined(PETSC_USE_REAL_SINGLE) 141 CMUMPS_STRUC_C id; 142 #else 143 ZMUMPS_STRUC_C id; 144 #endif 145 #else 146 #if defined(PETSC_USE_REAL_SINGLE) 147 SMUMPS_STRUC_C id; 148 #else 149 DMUMPS_STRUC_C id; 150 #endif 151 #endif 152 153 MatStructure matstruc; 154 PetscMPIInt myid, petsc_size; 155 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 156 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 157 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 158 PetscMUMPSInt sym; 159 MPI_Comm mumps_comm; 160 PetscMUMPSInt *ICNTL_pre; 161 PetscReal *CNTL_pre; 162 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 163 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 164 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 165 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 166 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 167 PetscInt *rhs_nrow, max_nrhs; 168 PetscMPIInt *rhs_recvcounts, *rhs_disps; 169 PetscScalar *rhs_loc, *rhs_recvbuf; 170 #endif 171 Vec b_seq, x_seq; 172 PetscInt ninfo, *info; /* which INFO to display */ 173 PetscInt sizeredrhs; 174 PetscScalar *schur_sol; 175 PetscInt schur_sizesol; 176 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 177 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 178 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 179 180 /* stuff used by petsc/mumps OpenMP support*/ 181 PetscBool use_petsc_omp_support; 182 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 183 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 184 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 185 PetscMPIInt tag, omp_comm_size; 186 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 187 MPI_Request *reqs; 188 }; 189 190 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 191 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 192 */ 193 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) { 194 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 195 196 PetscFunctionBegin; 197 #if defined(PETSC_USE_64BIT_INDICES) 198 { 199 PetscInt i; 200 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 201 PetscCall(PetscFree(mumps->ia_alloc)); 202 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 203 mumps->cur_ilen = nrow + 1; 204 } 205 if (nnz > mumps->cur_jlen) { 206 PetscCall(PetscFree(mumps->ja_alloc)); 207 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 208 mumps->cur_jlen = nnz; 209 } 210 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 211 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 212 *ia_mumps = mumps->ia_alloc; 213 *ja_mumps = mumps->ja_alloc; 214 } 215 #else 216 *ia_mumps = ia; 217 *ja_mumps = ja; 218 #endif 219 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) { 224 PetscFunctionBegin; 225 PetscCall(PetscFree(mumps->id.listvar_schur)); 226 PetscCall(PetscFree(mumps->id.redrhs)); 227 PetscCall(PetscFree(mumps->schur_sol)); 228 mumps->id.size_schur = 0; 229 mumps->id.schur_lld = 0; 230 mumps->id.ICNTL(19) = 0; 231 PetscFunctionReturn(0); 232 } 233 234 /* solve with rhs in mumps->id.redrhs and return in the same location */ 235 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) { 236 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 237 Mat S, B, X; 238 MatFactorSchurStatus schurstatus; 239 PetscInt sizesol; 240 241 PetscFunctionBegin; 242 PetscCall(MatFactorFactorizeSchurComplement(F)); 243 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 244 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 245 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 246 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 247 PetscCall(MatBindToCPU(B, S->boundtocpu)); 248 #endif 249 switch (schurstatus) { 250 case MAT_FACTOR_SCHUR_FACTORED: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 252 PetscCall(MatBindToCPU(X, S->boundtocpu)); 253 #endif 254 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 255 PetscCall(MatMatSolveTranspose(S, B, X)); 256 } else { 257 PetscCall(MatMatSolve(S, B, X)); 258 } 259 break; 260 case MAT_FACTOR_SCHUR_INVERTED: 261 sizesol = mumps->id.nrhs * mumps->id.size_schur; 262 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 263 PetscCall(PetscFree(mumps->schur_sol)); 264 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 265 mumps->schur_sizesol = sizesol; 266 } 267 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 268 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 269 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 270 PetscCall(MatBindToCPU(X, S->boundtocpu)); 271 #endif 272 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 273 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 274 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 275 } else { 276 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 277 } 278 PetscCall(MatProductSetFromOptions(X)); 279 PetscCall(MatProductSymbolic(X)); 280 PetscCall(MatProductNumeric(X)); 281 282 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 283 break; 284 default: SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 285 } 286 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 287 PetscCall(MatDestroy(&B)); 288 PetscCall(MatDestroy(&X)); 289 PetscFunctionReturn(0); 290 } 291 292 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) { 293 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 294 295 PetscFunctionBegin; 296 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 297 PetscFunctionReturn(0); 298 } 299 if (!expansion) { /* prepare for the condensation step */ 300 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 301 /* allocate MUMPS internal array to store reduced right-hand sides */ 302 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 303 PetscCall(PetscFree(mumps->id.redrhs)); 304 mumps->id.lredrhs = mumps->id.size_schur; 305 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 306 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 307 } 308 mumps->id.ICNTL(26) = 1; /* condensation phase */ 309 } else { /* prepare for the expansion step */ 310 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 311 PetscCall(MatMumpsSolveSchur_Private(F)); 312 mumps->id.ICNTL(26) = 2; /* expansion phase */ 313 PetscMUMPS_c(mumps); 314 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 315 /* restore defaults */ 316 mumps->id.ICNTL(26) = -1; 317 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 318 if (mumps->id.nrhs > 1) { 319 PetscCall(PetscFree(mumps->id.redrhs)); 320 mumps->id.lredrhs = 0; 321 mumps->sizeredrhs = 0; 322 } 323 } 324 PetscFunctionReturn(0); 325 } 326 327 /* 328 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 329 330 input: 331 A - matrix in aij,baij or sbaij format 332 shift - 0: C style output triple; 1: Fortran style output triple. 333 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 334 MAT_REUSE_MATRIX: only the values in v array are updated 335 output: 336 nnz - dim of r, c, and v (number of local nonzero entries of A) 337 r, c, v - row and col index, matrix values (matrix triples) 338 339 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 340 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 341 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 342 343 */ 344 345 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) { 346 const PetscScalar *av; 347 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 348 PetscInt64 nz, rnz, i, j, k; 349 PetscMUMPSInt *row, *col; 350 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 351 352 PetscFunctionBegin; 353 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 354 mumps->val = (PetscScalar *)av; 355 if (reuse == MAT_INITIAL_MATRIX) { 356 nz = aa->nz; 357 ai = aa->i; 358 aj = aa->j; 359 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 360 for (i = k = 0; i < M; i++) { 361 rnz = ai[i + 1] - ai[i]; 362 ajj = aj + ai[i]; 363 for (j = 0; j < rnz; j++) { 364 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 365 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 366 k++; 367 } 368 } 369 mumps->irn = row; 370 mumps->jcn = col; 371 mumps->nnz = nz; 372 } 373 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 374 PetscFunctionReturn(0); 375 } 376 377 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) { 378 PetscInt64 nz, i, j, k, r; 379 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 380 PetscMUMPSInt *row, *col; 381 382 PetscFunctionBegin; 383 mumps->val = a->val; 384 if (reuse == MAT_INITIAL_MATRIX) { 385 nz = a->sliidx[a->totalslices]; 386 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 387 for (i = k = 0; i < a->totalslices; i++) { 388 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) { PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); } 389 } 390 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 391 mumps->irn = row; 392 mumps->jcn = col; 393 mumps->nnz = nz; 394 } 395 PetscFunctionReturn(0); 396 } 397 398 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) { 399 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 400 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 401 PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 402 PetscInt bs; 403 PetscMUMPSInt *row, *col; 404 405 PetscFunctionBegin; 406 PetscCall(MatGetBlockSize(A, &bs)); 407 M = A->rmap->N / bs; 408 mumps->val = aa->a; 409 if (reuse == MAT_INITIAL_MATRIX) { 410 ai = aa->i; 411 aj = aa->j; 412 nz = bs2 * aa->nz; 413 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 414 for (i = 0; i < M; i++) { 415 ajj = aj + ai[i]; 416 rnz = ai[i + 1] - ai[i]; 417 for (k = 0; k < rnz; k++) { 418 for (j = 0; j < bs; j++) { 419 for (m = 0; m < bs; m++) { 420 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 421 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 422 idx++; 423 } 424 } 425 } 426 } 427 mumps->irn = row; 428 mumps->jcn = col; 429 mumps->nnz = nz; 430 } 431 PetscFunctionReturn(0); 432 } 433 434 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) { 435 const PetscInt *ai, *aj, *ajj; 436 PetscInt bs; 437 PetscInt64 nz, rnz, i, j, k, m; 438 PetscMUMPSInt *row, *col; 439 PetscScalar *val; 440 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 441 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 442 #if defined(PETSC_USE_COMPLEX) 443 PetscBool isset, hermitian; 444 #endif 445 446 PetscFunctionBegin; 447 #if defined(PETSC_USE_COMPLEX) 448 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 449 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 450 #endif 451 ai = aa->i; 452 aj = aa->j; 453 PetscCall(MatGetBlockSize(A, &bs)); 454 if (reuse == MAT_INITIAL_MATRIX) { 455 nz = aa->nz; 456 PetscCall(PetscMalloc2(bs2 * nz, &row, bs2 * nz, &col)); 457 if (bs > 1) { 458 PetscCall(PetscMalloc1(bs2 * nz, &mumps->val_alloc)); 459 mumps->val = mumps->val_alloc; 460 } else { 461 mumps->val = aa->a; 462 } 463 mumps->irn = row; 464 mumps->jcn = col; 465 } else { 466 if (bs == 1) mumps->val = aa->a; 467 row = mumps->irn; 468 col = mumps->jcn; 469 } 470 val = mumps->val; 471 472 nz = 0; 473 if (bs > 1) { 474 for (i = 0; i < mbs; i++) { 475 rnz = ai[i + 1] - ai[i]; 476 ajj = aj + ai[i]; 477 for (j = 0; j < rnz; j++) { 478 for (k = 0; k < bs; k++) { 479 for (m = 0; m < bs; m++) { 480 if (ajj[j] > i || k >= m) { 481 if (reuse == MAT_INITIAL_MATRIX) { 482 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 483 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 484 } 485 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 486 } 487 } 488 } 489 } 490 } 491 } else if (reuse == MAT_INITIAL_MATRIX) { 492 for (i = 0; i < mbs; i++) { 493 rnz = ai[i + 1] - ai[i]; 494 ajj = aj + ai[i]; 495 for (j = 0; j < rnz; j++) { 496 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 497 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 498 nz++; 499 } 500 } 501 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 502 } 503 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 504 PetscFunctionReturn(0); 505 } 506 507 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) { 508 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 509 PetscInt64 nz, rnz, i, j; 510 const PetscScalar *av, *v1; 511 PetscScalar *val; 512 PetscMUMPSInt *row, *col; 513 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 514 PetscBool missing; 515 #if defined(PETSC_USE_COMPLEX) 516 PetscBool hermitian, isset; 517 #endif 518 519 PetscFunctionBegin; 520 #if defined(PETSC_USE_COMPLEX) 521 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 522 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 523 #endif 524 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 525 ai = aa->i; 526 aj = aa->j; 527 adiag = aa->diag; 528 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 529 if (reuse == MAT_INITIAL_MATRIX) { 530 /* count nz in the upper triangular part of A */ 531 nz = 0; 532 if (missing) { 533 for (i = 0; i < M; i++) { 534 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 535 for (j = ai[i]; j < ai[i + 1]; j++) { 536 if (aj[j] < i) continue; 537 nz++; 538 } 539 } else { 540 nz += ai[i + 1] - adiag[i]; 541 } 542 } 543 } else { 544 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 545 } 546 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 547 PetscCall(PetscMalloc1(nz, &val)); 548 mumps->nnz = nz; 549 mumps->irn = row; 550 mumps->jcn = col; 551 mumps->val = mumps->val_alloc = val; 552 553 nz = 0; 554 if (missing) { 555 for (i = 0; i < M; i++) { 556 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 557 for (j = ai[i]; j < ai[i + 1]; j++) { 558 if (aj[j] < i) continue; 559 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 560 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 561 val[nz] = av[j]; 562 nz++; 563 } 564 } else { 565 rnz = ai[i + 1] - adiag[i]; 566 ajj = aj + adiag[i]; 567 v1 = av + adiag[i]; 568 for (j = 0; j < rnz; j++) { 569 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 570 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 571 val[nz++] = v1[j]; 572 } 573 } 574 } 575 } else { 576 for (i = 0; i < M; i++) { 577 rnz = ai[i + 1] - adiag[i]; 578 ajj = aj + adiag[i]; 579 v1 = av + adiag[i]; 580 for (j = 0; j < rnz; j++) { 581 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 582 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 583 val[nz++] = v1[j]; 584 } 585 } 586 } 587 } else { 588 nz = 0; 589 val = mumps->val; 590 if (missing) { 591 for (i = 0; i < M; i++) { 592 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 593 for (j = ai[i]; j < ai[i + 1]; j++) { 594 if (aj[j] < i) continue; 595 val[nz++] = av[j]; 596 } 597 } else { 598 rnz = ai[i + 1] - adiag[i]; 599 v1 = av + adiag[i]; 600 for (j = 0; j < rnz; j++) { val[nz++] = v1[j]; } 601 } 602 } 603 } else { 604 for (i = 0; i < M; i++) { 605 rnz = ai[i + 1] - adiag[i]; 606 v1 = av + adiag[i]; 607 for (j = 0; j < rnz; j++) { val[nz++] = v1[j]; } 608 } 609 } 610 } 611 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 612 PetscFunctionReturn(0); 613 } 614 615 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) { 616 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 617 PetscInt bs; 618 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 619 PetscMUMPSInt *row, *col; 620 const PetscScalar *av, *bv, *v1, *v2; 621 PetscScalar *val; 622 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 623 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 624 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 625 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 626 #if defined(PETSC_USE_COMPLEX) 627 PetscBool hermitian, isset; 628 #endif 629 630 PetscFunctionBegin; 631 #if defined(PETSC_USE_COMPLEX) 632 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 633 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 634 #endif 635 PetscCall(MatGetBlockSize(A, &bs)); 636 rstart = A->rmap->rstart; 637 ai = aa->i; 638 aj = aa->j; 639 bi = bb->i; 640 bj = bb->j; 641 av = aa->a; 642 bv = bb->a; 643 644 garray = mat->garray; 645 646 if (reuse == MAT_INITIAL_MATRIX) { 647 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 648 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 649 PetscCall(PetscMalloc1(nz, &val)); 650 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 651 mumps->irn = row; 652 mumps->jcn = col; 653 mumps->val = mumps->val_alloc = val; 654 } else { 655 val = mumps->val; 656 } 657 658 jj = 0; 659 irow = rstart; 660 for (i = 0; i < mbs; i++) { 661 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 662 countA = ai[i + 1] - ai[i]; 663 countB = bi[i + 1] - bi[i]; 664 bjj = bj + bi[i]; 665 v1 = av + ai[i] * bs2; 666 v2 = bv + bi[i] * bs2; 667 668 if (bs > 1) { 669 /* A-part */ 670 for (j = 0; j < countA; j++) { 671 for (k = 0; k < bs; k++) { 672 for (m = 0; m < bs; m++) { 673 if (rstart + ajj[j] * bs > irow || k >= m) { 674 if (reuse == MAT_INITIAL_MATRIX) { 675 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 676 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 677 } 678 val[jj++] = v1[j * bs2 + m + k * bs]; 679 } 680 } 681 } 682 } 683 684 /* B-part */ 685 for (j = 0; j < countB; j++) { 686 for (k = 0; k < bs; k++) { 687 for (m = 0; m < bs; m++) { 688 if (reuse == MAT_INITIAL_MATRIX) { 689 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 690 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 691 } 692 val[jj++] = v2[j * bs2 + m + k * bs]; 693 } 694 } 695 } 696 } else { 697 /* A-part */ 698 for (j = 0; j < countA; j++) { 699 if (reuse == MAT_INITIAL_MATRIX) { 700 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 701 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 702 } 703 val[jj++] = v1[j]; 704 } 705 706 /* B-part */ 707 for (j = 0; j < countB; j++) { 708 if (reuse == MAT_INITIAL_MATRIX) { 709 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 710 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 711 } 712 val[jj++] = v2[j]; 713 } 714 } 715 irow += bs; 716 } 717 mumps->nnz = jj; 718 PetscFunctionReturn(0); 719 } 720 721 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) { 722 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 723 PetscInt64 rstart, nz, i, j, jj, irow, countA, countB; 724 PetscMUMPSInt *row, *col; 725 const PetscScalar *av, *bv, *v1, *v2; 726 PetscScalar *val; 727 Mat Ad, Ao; 728 Mat_SeqAIJ *aa; 729 Mat_SeqAIJ *bb; 730 731 PetscFunctionBegin; 732 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 733 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 734 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 735 736 aa = (Mat_SeqAIJ *)(Ad)->data; 737 bb = (Mat_SeqAIJ *)(Ao)->data; 738 ai = aa->i; 739 aj = aa->j; 740 bi = bb->i; 741 bj = bb->j; 742 743 rstart = A->rmap->rstart; 744 745 if (reuse == MAT_INITIAL_MATRIX) { 746 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 747 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 748 PetscCall(PetscMalloc1(nz, &val)); 749 mumps->nnz = nz; 750 mumps->irn = row; 751 mumps->jcn = col; 752 mumps->val = mumps->val_alloc = val; 753 } else { 754 val = mumps->val; 755 } 756 757 jj = 0; 758 irow = rstart; 759 for (i = 0; i < m; i++) { 760 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 761 countA = ai[i + 1] - ai[i]; 762 countB = bi[i + 1] - bi[i]; 763 bjj = bj + bi[i]; 764 v1 = av + ai[i]; 765 v2 = bv + bi[i]; 766 767 /* A-part */ 768 for (j = 0; j < countA; j++) { 769 if (reuse == MAT_INITIAL_MATRIX) { 770 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 771 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 772 } 773 val[jj++] = v1[j]; 774 } 775 776 /* B-part */ 777 for (j = 0; j < countB; j++) { 778 if (reuse == MAT_INITIAL_MATRIX) { 779 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 780 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 781 } 782 val[jj++] = v2[j]; 783 } 784 irow++; 785 } 786 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 787 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 788 PetscFunctionReturn(0); 789 } 790 791 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) { 792 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 793 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 794 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 795 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 796 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart; 797 const PetscInt bs2 = mat->bs2; 798 PetscInt bs; 799 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 800 PetscMUMPSInt *row, *col; 801 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 802 PetscScalar *val; 803 804 PetscFunctionBegin; 805 PetscCall(MatGetBlockSize(A, &bs)); 806 if (reuse == MAT_INITIAL_MATRIX) { 807 nz = bs2 * (aa->nz + bb->nz); 808 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 809 PetscCall(PetscMalloc1(nz, &val)); 810 mumps->nnz = nz; 811 mumps->irn = row; 812 mumps->jcn = col; 813 mumps->val = mumps->val_alloc = val; 814 } else { 815 val = mumps->val; 816 } 817 818 jj = 0; 819 irow = rstart; 820 for (i = 0; i < mbs; i++) { 821 countA = ai[i + 1] - ai[i]; 822 countB = bi[i + 1] - bi[i]; 823 ajj = aj + ai[i]; 824 bjj = bj + bi[i]; 825 v1 = av + bs2 * ai[i]; 826 v2 = bv + bs2 * bi[i]; 827 828 idx = 0; 829 /* A-part */ 830 for (k = 0; k < countA; k++) { 831 for (j = 0; j < bs; j++) { 832 for (n = 0; n < bs; n++) { 833 if (reuse == MAT_INITIAL_MATRIX) { 834 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 835 PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj])); 836 } 837 val[jj++] = v1[idx++]; 838 } 839 } 840 } 841 842 idx = 0; 843 /* B-part */ 844 for (k = 0; k < countB; k++) { 845 for (j = 0; j < bs; j++) { 846 for (n = 0; n < bs; n++) { 847 if (reuse == MAT_INITIAL_MATRIX) { 848 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 849 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 850 } 851 val[jj++] = v2[idx++]; 852 } 853 } 854 } 855 irow += bs; 856 } 857 PetscFunctionReturn(0); 858 } 859 860 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) { 861 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 862 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 863 PetscMUMPSInt *row, *col; 864 const PetscScalar *av, *bv, *v1, *v2; 865 PetscScalar *val; 866 Mat Ad, Ao; 867 Mat_SeqAIJ *aa; 868 Mat_SeqAIJ *bb; 869 #if defined(PETSC_USE_COMPLEX) 870 PetscBool hermitian, isset; 871 #endif 872 873 PetscFunctionBegin; 874 #if defined(PETSC_USE_COMPLEX) 875 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 876 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 877 #endif 878 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 879 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 880 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 881 882 aa = (Mat_SeqAIJ *)(Ad)->data; 883 bb = (Mat_SeqAIJ *)(Ao)->data; 884 ai = aa->i; 885 aj = aa->j; 886 adiag = aa->diag; 887 bi = bb->i; 888 bj = bb->j; 889 890 rstart = A->rmap->rstart; 891 892 if (reuse == MAT_INITIAL_MATRIX) { 893 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 894 nzb = 0; /* num of upper triangular entries in mat->B */ 895 for (i = 0; i < m; i++) { 896 nza += (ai[i + 1] - adiag[i]); 897 countB = bi[i + 1] - bi[i]; 898 bjj = bj + bi[i]; 899 for (j = 0; j < countB; j++) { 900 if (garray[bjj[j]] > rstart) nzb++; 901 } 902 } 903 904 nz = nza + nzb; /* total nz of upper triangular part of mat */ 905 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 906 PetscCall(PetscMalloc1(nz, &val)); 907 mumps->nnz = nz; 908 mumps->irn = row; 909 mumps->jcn = col; 910 mumps->val = mumps->val_alloc = val; 911 } else { 912 val = mumps->val; 913 } 914 915 jj = 0; 916 irow = rstart; 917 for (i = 0; i < m; i++) { 918 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 919 v1 = av + adiag[i]; 920 countA = ai[i + 1] - adiag[i]; 921 countB = bi[i + 1] - bi[i]; 922 bjj = bj + bi[i]; 923 v2 = bv + bi[i]; 924 925 /* A-part */ 926 for (j = 0; j < countA; j++) { 927 if (reuse == MAT_INITIAL_MATRIX) { 928 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 929 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 930 } 931 val[jj++] = v1[j]; 932 } 933 934 /* B-part */ 935 for (j = 0; j < countB; j++) { 936 if (garray[bjj[j]] > rstart) { 937 if (reuse == MAT_INITIAL_MATRIX) { 938 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 939 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 940 } 941 val[jj++] = v2[j]; 942 } 943 } 944 irow++; 945 } 946 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 947 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 948 PetscFunctionReturn(0); 949 } 950 951 PetscErrorCode MatDestroy_MUMPS(Mat A) { 952 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 953 954 PetscFunctionBegin; 955 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 956 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 957 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 958 PetscCall(VecDestroy(&mumps->b_seq)); 959 PetscCall(VecDestroy(&mumps->x_seq)); 960 PetscCall(PetscFree(mumps->id.perm_in)); 961 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 962 PetscCall(PetscFree(mumps->val_alloc)); 963 PetscCall(PetscFree(mumps->info)); 964 PetscCall(PetscFree(mumps->ICNTL_pre)); 965 PetscCall(PetscFree(mumps->CNTL_pre)); 966 PetscCall(MatMumpsResetSchur_Private(mumps)); 967 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 968 mumps->id.job = JOB_END; 969 PetscMUMPS_c(mumps); 970 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 971 if (mumps->mumps_comm != MPI_COMM_NULL) { 972 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 973 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 974 } 975 } 976 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 977 if (mumps->use_petsc_omp_support) { 978 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 979 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 980 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 981 } 982 #endif 983 PetscCall(PetscFree(mumps->ia_alloc)); 984 PetscCall(PetscFree(mumps->ja_alloc)); 985 PetscCall(PetscFree(mumps->recvcount)); 986 PetscCall(PetscFree(mumps->reqs)); 987 PetscCall(PetscFree(mumps->irhs_loc)); 988 PetscCall(PetscFree(A->data)); 989 990 /* clear composed functions */ 991 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 992 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 993 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 994 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 995 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 996 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 997 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 998 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 999 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1000 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1001 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1002 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1003 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1008 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) { 1009 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1010 const PetscMPIInt ompsize = mumps->omp_comm_size; 1011 PetscInt i, m, M, rstart; 1012 1013 PetscFunctionBegin; 1014 PetscCall(MatGetSize(A, &M, NULL)); 1015 PetscCall(MatGetLocalSize(A, &m, NULL)); 1016 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1017 if (ompsize == 1) { 1018 if (!mumps->irhs_loc) { 1019 mumps->nloc_rhs = m; 1020 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1021 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1022 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1023 } 1024 mumps->id.rhs_loc = (MumpsScalar *)array; 1025 } else { 1026 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1027 const PetscInt *ranges; 1028 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1029 MPI_Group petsc_group, omp_group; 1030 PetscScalar *recvbuf = NULL; 1031 1032 if (mumps->is_omp_master) { 1033 /* Lazily initialize the omp stuff for distributed rhs */ 1034 if (!mumps->irhs_loc) { 1035 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1036 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1037 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1038 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1039 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1040 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1041 1042 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1043 mumps->nloc_rhs = 0; 1044 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1045 for (j = 0; j < ompsize; j++) { 1046 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1047 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1048 } 1049 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1050 for (j = k = 0; j < ompsize; j++) { 1051 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1052 } 1053 1054 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1055 PetscCallMPI(MPI_Group_free(&petsc_group)); 1056 PetscCallMPI(MPI_Group_free(&omp_group)); 1057 } 1058 1059 /* Realloc buffers when current nrhs is bigger than what we have met */ 1060 if (nrhs > mumps->max_nrhs) { 1061 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1062 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1063 mumps->max_nrhs = nrhs; 1064 } 1065 1066 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1067 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1068 mumps->rhs_disps[0] = 0; 1069 for (j = 1; j < ompsize; j++) { 1070 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1071 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1072 } 1073 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1074 } 1075 1076 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1077 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1078 1079 if (mumps->is_omp_master) { 1080 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1081 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1082 for (j = 0; j < ompsize; j++) { 1083 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1084 dst = dstbase; 1085 for (i = 0; i < nrhs; i++) { 1086 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1087 src += mumps->rhs_nrow[j]; 1088 dst += mumps->nloc_rhs; 1089 } 1090 dstbase += mumps->rhs_nrow[j]; 1091 } 1092 } 1093 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1094 } 1095 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1096 } 1097 mumps->id.nrhs = nrhs; 1098 mumps->id.nloc_rhs = mumps->nloc_rhs; 1099 mumps->id.lrhs_loc = mumps->nloc_rhs; 1100 mumps->id.irhs_loc = mumps->irhs_loc; 1101 PetscFunctionReturn(0); 1102 } 1103 1104 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) { 1105 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1106 const PetscScalar *rarray = NULL; 1107 PetscScalar *array; 1108 IS is_iden, is_petsc; 1109 PetscInt i; 1110 PetscBool second_solve = PETSC_FALSE; 1111 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1112 1113 PetscFunctionBegin; 1114 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1115 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1116 &cite1)); 1117 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1118 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1119 &cite2)); 1120 1121 if (A->factorerrortype) { 1122 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1123 PetscCall(VecSetInf(x)); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 mumps->id.nrhs = 1; 1128 if (mumps->petsc_size > 1) { 1129 if (mumps->ICNTL20 == 10) { 1130 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1131 PetscCall(VecGetArrayRead(b, &rarray)); 1132 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1133 } else { 1134 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1135 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1136 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1137 if (!mumps->myid) { 1138 PetscCall(VecGetArray(mumps->b_seq, &array)); 1139 mumps->id.rhs = (MumpsScalar *)array; 1140 } 1141 } 1142 } else { /* petsc_size == 1 */ 1143 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1144 PetscCall(VecCopy(b, x)); 1145 PetscCall(VecGetArray(x, &array)); 1146 mumps->id.rhs = (MumpsScalar *)array; 1147 } 1148 1149 /* 1150 handle condensation step of Schur complement (if any) 1151 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1152 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1153 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1154 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1155 */ 1156 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1157 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1158 second_solve = PETSC_TRUE; 1159 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1160 } 1161 /* solve phase */ 1162 /*-------------*/ 1163 mumps->id.job = JOB_SOLVE; 1164 PetscMUMPS_c(mumps); 1165 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1166 1167 /* handle expansion step of Schur complement (if any) */ 1168 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1169 1170 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1171 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1172 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1173 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1174 } 1175 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1176 PetscInt *isol2_loc = NULL; 1177 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1178 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1179 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1180 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1181 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1182 PetscCall(ISDestroy(&is_iden)); 1183 PetscCall(ISDestroy(&is_petsc)); 1184 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1185 } 1186 1187 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1188 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1189 } 1190 1191 if (mumps->petsc_size > 1) { 1192 if (mumps->ICNTL20 == 10) { 1193 PetscCall(VecRestoreArrayRead(b, &rarray)); 1194 } else if (!mumps->myid) { 1195 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1196 } 1197 } else PetscCall(VecRestoreArray(x, &array)); 1198 1199 PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3))); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) { 1204 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1205 1206 PetscFunctionBegin; 1207 mumps->id.ICNTL(9) = 0; 1208 PetscCall(MatSolve_MUMPS(A, b, x)); 1209 mumps->id.ICNTL(9) = 1; 1210 PetscFunctionReturn(0); 1211 } 1212 1213 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) { 1214 Mat Bt = NULL; 1215 PetscBool denseX, denseB, flg, flgT; 1216 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1217 PetscInt i, nrhs, M; 1218 PetscScalar *array; 1219 const PetscScalar *rbray; 1220 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1221 PetscMUMPSInt *isol_loc, *isol_loc_save; 1222 PetscScalar *bray, *sol_loc, *sol_loc_save; 1223 IS is_to, is_from; 1224 PetscInt k, proc, j, m, myrstart; 1225 const PetscInt *rstart; 1226 Vec v_mpi, msol_loc; 1227 VecScatter scat_sol; 1228 Vec b_seq; 1229 VecScatter scat_rhs; 1230 PetscScalar *aa; 1231 PetscInt spnr, *ia, *ja; 1232 Mat_MPIAIJ *b = NULL; 1233 1234 PetscFunctionBegin; 1235 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1236 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1237 1238 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1239 if (denseB) { 1240 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1241 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1242 } else { /* sparse B */ 1243 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1244 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEMAT, &flgT)); 1245 if (flgT) { /* input B is transpose of actural RHS matrix, 1246 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1247 PetscCall(MatTransposeGetMat(B, &Bt)); 1248 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEMAT matrix"); 1249 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1250 } 1251 1252 PetscCall(MatGetSize(B, &M, &nrhs)); 1253 mumps->id.nrhs = nrhs; 1254 mumps->id.lrhs = M; 1255 mumps->id.rhs = NULL; 1256 1257 if (mumps->petsc_size == 1) { 1258 PetscScalar *aa; 1259 PetscInt spnr, *ia, *ja; 1260 PetscBool second_solve = PETSC_FALSE; 1261 1262 PetscCall(MatDenseGetArray(X, &array)); 1263 mumps->id.rhs = (MumpsScalar *)array; 1264 1265 if (denseB) { 1266 /* copy B to X */ 1267 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1268 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1269 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1270 } else { /* sparse B */ 1271 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1272 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1273 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1274 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1275 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1276 } 1277 /* handle condensation step of Schur complement (if any) */ 1278 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1279 second_solve = PETSC_TRUE; 1280 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1281 } 1282 /* solve phase */ 1283 /*-------------*/ 1284 mumps->id.job = JOB_SOLVE; 1285 PetscMUMPS_c(mumps); 1286 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1287 1288 /* handle expansion step of Schur complement (if any) */ 1289 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1290 if (!denseB) { /* sparse B */ 1291 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1292 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1293 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1294 } 1295 PetscCall(MatDenseRestoreArray(X, &array)); 1296 PetscFunctionReturn(0); 1297 } 1298 1299 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1300 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1301 1302 /* create msol_loc to hold mumps local solution */ 1303 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1304 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1305 1306 lsol_loc = mumps->id.lsol_loc; 1307 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1308 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1309 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1310 mumps->id.isol_loc = isol_loc; 1311 1312 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1313 1314 if (denseB) { 1315 if (mumps->ICNTL20 == 10) { 1316 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1317 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1318 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1319 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1320 PetscCall(MatGetLocalSize(B, &m, NULL)); 1321 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1322 } else { 1323 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1324 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1325 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1326 0, re-arrange B into desired order, which is a local operation. 1327 */ 1328 1329 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1330 /* wrap dense rhs matrix B into a vector v_mpi */ 1331 PetscCall(MatGetLocalSize(B, &m, NULL)); 1332 PetscCall(MatDenseGetArray(B, &bray)); 1333 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1334 PetscCall(MatDenseRestoreArray(B, &bray)); 1335 1336 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1337 if (!mumps->myid) { 1338 PetscInt *idx; 1339 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1340 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1341 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1342 k = 0; 1343 for (proc = 0; proc < mumps->petsc_size; proc++) { 1344 for (j = 0; j < nrhs; j++) { 1345 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1346 } 1347 } 1348 1349 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1350 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1351 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1352 } else { 1353 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1354 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1355 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1356 } 1357 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1358 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1359 PetscCall(ISDestroy(&is_to)); 1360 PetscCall(ISDestroy(&is_from)); 1361 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1362 1363 if (!mumps->myid) { /* define rhs on the host */ 1364 PetscCall(VecGetArray(b_seq, &bray)); 1365 mumps->id.rhs = (MumpsScalar *)bray; 1366 PetscCall(VecRestoreArray(b_seq, &bray)); 1367 } 1368 } 1369 } else { /* sparse B */ 1370 b = (Mat_MPIAIJ *)Bt->data; 1371 1372 /* wrap dense X into a vector v_mpi */ 1373 PetscCall(MatGetLocalSize(X, &m, NULL)); 1374 PetscCall(MatDenseGetArray(X, &bray)); 1375 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1376 PetscCall(MatDenseRestoreArray(X, &bray)); 1377 1378 if (!mumps->myid) { 1379 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1380 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1381 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1382 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1383 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1384 } else { 1385 mumps->id.irhs_ptr = NULL; 1386 mumps->id.irhs_sparse = NULL; 1387 mumps->id.nz_rhs = 0; 1388 mumps->id.rhs_sparse = NULL; 1389 } 1390 } 1391 1392 /* solve phase */ 1393 /*-------------*/ 1394 mumps->id.job = JOB_SOLVE; 1395 PetscMUMPS_c(mumps); 1396 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1397 1398 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1399 PetscCall(MatDenseGetArray(X, &array)); 1400 PetscCall(VecPlaceArray(v_mpi, array)); 1401 1402 /* create scatter scat_sol */ 1403 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1404 /* iidx: index for scatter mumps solution to petsc X */ 1405 1406 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1407 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1408 for (i = 0; i < lsol_loc; i++) { 1409 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1410 1411 for (proc = 0; proc < mumps->petsc_size; proc++) { 1412 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1413 myrstart = rstart[proc]; 1414 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1415 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1416 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1417 break; 1418 } 1419 } 1420 1421 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1422 } 1423 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1424 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1425 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1426 PetscCall(ISDestroy(&is_from)); 1427 PetscCall(ISDestroy(&is_to)); 1428 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1429 PetscCall(MatDenseRestoreArray(X, &array)); 1430 1431 /* free spaces */ 1432 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1433 mumps->id.isol_loc = isol_loc_save; 1434 1435 PetscCall(PetscFree2(sol_loc, isol_loc)); 1436 PetscCall(PetscFree(idxx)); 1437 PetscCall(VecDestroy(&msol_loc)); 1438 PetscCall(VecDestroy(&v_mpi)); 1439 if (!denseB) { 1440 if (!mumps->myid) { 1441 b = (Mat_MPIAIJ *)Bt->data; 1442 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1443 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1444 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1445 } 1446 } else { 1447 if (mumps->ICNTL20 == 0) { 1448 PetscCall(VecDestroy(&b_seq)); 1449 PetscCall(VecScatterDestroy(&scat_rhs)); 1450 } 1451 } 1452 PetscCall(VecScatterDestroy(&scat_sol)); 1453 PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3))); 1454 PetscFunctionReturn(0); 1455 } 1456 1457 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) { 1458 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1459 PetscMUMPSInt oldvalue = mumps->id.ICNTL(9); 1460 1461 PetscFunctionBegin; 1462 mumps->id.ICNTL(9) = 0; 1463 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1464 mumps->id.ICNTL(9) = oldvalue; 1465 PetscFunctionReturn(0); 1466 } 1467 1468 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) { 1469 PetscBool flg; 1470 Mat B; 1471 1472 PetscFunctionBegin; 1473 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1474 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1475 1476 /* Create B=Bt^T that uses Bt's data structure */ 1477 PetscCall(MatCreateTranspose(Bt, &B)); 1478 1479 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1480 PetscCall(MatDestroy(&B)); 1481 PetscFunctionReturn(0); 1482 } 1483 1484 #if !defined(PETSC_USE_COMPLEX) 1485 /* 1486 input: 1487 F: numeric factor 1488 output: 1489 nneg: total number of negative pivots 1490 nzero: total number of zero pivots 1491 npos: (global dimension of F) - nneg - nzero 1492 */ 1493 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) { 1494 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1495 PetscMPIInt size; 1496 1497 PetscFunctionBegin; 1498 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1499 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1500 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1501 1502 if (nneg) *nneg = mumps->id.INFOG(12); 1503 if (nzero || npos) { 1504 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1505 if (nzero) *nzero = mumps->id.INFOG(28); 1506 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1507 } 1508 PetscFunctionReturn(0); 1509 } 1510 #endif 1511 1512 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) { 1513 PetscInt i, nreqs; 1514 PetscMUMPSInt *irn, *jcn; 1515 PetscMPIInt count; 1516 PetscInt64 totnnz, remain; 1517 const PetscInt osize = mumps->omp_comm_size; 1518 PetscScalar *val; 1519 1520 PetscFunctionBegin; 1521 if (osize > 1) { 1522 if (reuse == MAT_INITIAL_MATRIX) { 1523 /* master first gathers counts of nonzeros to receive */ 1524 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1525 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1526 1527 /* Then each computes number of send/recvs */ 1528 if (mumps->is_omp_master) { 1529 /* Start from 1 since self communication is not done in MPI */ 1530 nreqs = 0; 1531 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1532 } else { 1533 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1534 } 1535 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */ 1536 1537 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1538 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1539 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1540 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1541 */ 1542 nreqs = 0; /* counter for actual send/recvs */ 1543 if (mumps->is_omp_master) { 1544 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1545 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1546 PetscCall(PetscMalloc1(totnnz, &val)); 1547 1548 /* Self communication */ 1549 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1550 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1551 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1552 1553 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1554 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1555 PetscCall(PetscFree(mumps->val_alloc)); 1556 mumps->nnz = totnnz; 1557 mumps->irn = irn; 1558 mumps->jcn = jcn; 1559 mumps->val = mumps->val_alloc = val; 1560 1561 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1562 jcn += mumps->recvcount[0]; 1563 val += mumps->recvcount[0]; 1564 1565 /* Remote communication */ 1566 for (i = 1; i < osize; i++) { 1567 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1568 remain = mumps->recvcount[i] - count; 1569 while (count > 0) { 1570 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1571 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1572 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1573 irn += count; 1574 jcn += count; 1575 val += count; 1576 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1577 remain -= count; 1578 } 1579 } 1580 } else { 1581 irn = mumps->irn; 1582 jcn = mumps->jcn; 1583 val = mumps->val; 1584 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1585 remain = mumps->nnz - count; 1586 while (count > 0) { 1587 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1588 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1589 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1590 irn += count; 1591 jcn += count; 1592 val += count; 1593 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1594 remain -= count; 1595 } 1596 } 1597 } else { 1598 nreqs = 0; 1599 if (mumps->is_omp_master) { 1600 val = mumps->val + mumps->recvcount[0]; 1601 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1602 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1603 remain = mumps->recvcount[i] - count; 1604 while (count > 0) { 1605 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1606 val += count; 1607 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1608 remain -= count; 1609 } 1610 } 1611 } else { 1612 val = mumps->val; 1613 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1614 remain = mumps->nnz - count; 1615 while (count > 0) { 1616 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1617 val += count; 1618 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1619 remain -= count; 1620 } 1621 } 1622 } 1623 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1624 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1625 } 1626 PetscFunctionReturn(0); 1627 } 1628 1629 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) { 1630 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1631 PetscBool isMPIAIJ; 1632 1633 PetscFunctionBegin; 1634 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1635 if (mumps->id.INFOG(1) == -6) { PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); } 1636 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1637 PetscFunctionReturn(0); 1638 } 1639 1640 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1641 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1642 1643 /* numerical factorization phase */ 1644 /*-------------------------------*/ 1645 mumps->id.job = JOB_FACTNUMERIC; 1646 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1647 if (!mumps->myid) { mumps->id.a = (MumpsScalar *)mumps->val; } 1648 } else { 1649 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1650 } 1651 PetscMUMPS_c(mumps); 1652 if (mumps->id.INFOG(1) < 0) { 1653 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 1654 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1655 PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1656 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1657 } else if (mumps->id.INFOG(1) == -13) { 1658 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1659 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1660 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1661 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1662 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1663 } else { 1664 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1665 F->factorerrortype = MAT_FACTOR_OTHER; 1666 } 1667 } 1668 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, " mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16)); 1669 1670 F->assembled = PETSC_TRUE; 1671 1672 if (F->schur) { /* reset Schur status to unfactored */ 1673 #if defined(PETSC_HAVE_CUDA) 1674 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1675 #endif 1676 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1677 mumps->id.ICNTL(19) = 2; 1678 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1679 } 1680 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1681 } 1682 1683 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1684 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1685 1686 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1687 if (mumps->petsc_size > 1) { 1688 PetscInt lsol_loc; 1689 PetscScalar *sol_loc; 1690 1691 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1692 1693 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1694 if (mumps->x_seq) { 1695 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1696 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1697 PetscCall(VecDestroy(&mumps->x_seq)); 1698 } 1699 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1700 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1701 mumps->id.lsol_loc = lsol_loc; 1702 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1703 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 1704 } 1705 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1706 PetscFunctionReturn(0); 1707 } 1708 1709 /* Sets MUMPS options from the options database */ 1710 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) { 1711 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1712 PetscMUMPSInt icntl = 0, size, *listvar_schur; 1713 PetscInt info[80], i, ninfo = 80, rbs, cbs; 1714 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1715 MumpsScalar *arr; 1716 1717 PetscFunctionBegin; 1718 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 1719 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1720 PetscInt nthreads = 0; 1721 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1722 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1723 1724 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1725 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 1726 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 1727 1728 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 1729 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1730 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1731 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 1732 if (mumps->use_petsc_omp_support) { 1733 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 1734 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1735 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1736 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1737 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 1738 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 1739 #endif 1740 } else { 1741 mumps->omp_comm = PETSC_COMM_SELF; 1742 mumps->mumps_comm = mumps->petsc_comm; 1743 mumps->is_omp_master = PETSC_TRUE; 1744 } 1745 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 1746 mumps->reqs = NULL; 1747 mumps->tag = 0; 1748 1749 if (mumps->mumps_comm != MPI_COMM_NULL) { 1750 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 1751 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1752 MPI_Comm comm; 1753 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 1754 mumps->mumps_comm = comm; 1755 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 1756 } 1757 1758 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1759 mumps->id.job = JOB_INIT; 1760 mumps->id.par = 1; /* host participates factorizaton and solve */ 1761 mumps->id.sym = mumps->sym; 1762 1763 size = mumps->id.size_schur; 1764 arr = mumps->id.schur; 1765 listvar_schur = mumps->id.listvar_schur; 1766 PetscMUMPS_c(mumps); 1767 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1768 /* restore cached ICNTL and CNTL values */ 1769 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 1770 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 1771 PetscCall(PetscFree(mumps->ICNTL_pre)); 1772 PetscCall(PetscFree(mumps->CNTL_pre)); 1773 1774 if (schur) { 1775 mumps->id.size_schur = size; 1776 mumps->id.schur_lld = size; 1777 mumps->id.schur = arr; 1778 mumps->id.listvar_schur = listvar_schur; 1779 if (mumps->petsc_size > 1) { 1780 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 1781 1782 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1783 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1784 PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 1785 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 1786 } else { 1787 if (F->factortype == MAT_FACTOR_LU) { 1788 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1789 } else { 1790 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1791 } 1792 } 1793 mumps->id.ICNTL(26) = -1; 1794 } 1795 1796 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1797 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1798 */ 1799 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 1800 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 1801 1802 mumps->scat_rhs = NULL; 1803 mumps->scat_sol = NULL; 1804 1805 /* set PETSc-MUMPS default options - override MUMPS default */ 1806 mumps->id.ICNTL(3) = 0; 1807 mumps->id.ICNTL(4) = 0; 1808 if (mumps->petsc_size == 1) { 1809 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1810 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1811 } else { 1812 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1813 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1814 } 1815 } 1816 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 1817 if (flg) mumps->id.ICNTL(1) = icntl; 1818 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 1819 if (flg) mumps->id.ICNTL(2) = icntl; 1820 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 1821 if (flg) mumps->id.ICNTL(3) = icntl; 1822 1823 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 1824 if (flg) mumps->id.ICNTL(4) = icntl; 1825 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1826 1827 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 1828 if (flg) mumps->id.ICNTL(6) = icntl; 1829 1830 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 1831 if (flg) { 1832 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 1833 mumps->id.ICNTL(7) = icntl; 1834 } 1835 1836 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 1837 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 1838 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 1839 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 1840 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 1841 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 1842 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 1843 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 1844 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 1845 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 1846 if (flg) { 1847 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 1848 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 1849 } 1850 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 1851 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1852 PetscCall(MatDestroy(&F->schur)); 1853 PetscCall(MatMumpsResetSchur_Private(mumps)); 1854 } 1855 1856 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 1857 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1858 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1859 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 1860 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 1861 In short, we could not use distributed RHS with MPICH until v4.0b1. 1862 */ 1863 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 1864 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1865 #else 1866 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1867 #endif 1868 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 1869 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 1870 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 1871 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 1872 #endif 1873 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 1874 1875 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 1876 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 1877 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 1878 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 1879 1880 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 1881 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 1882 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 1883 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 1884 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 1885 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 1886 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 1887 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 1888 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 1889 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 1890 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 1891 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 1892 1893 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 1894 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 1895 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 1896 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 1897 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 1898 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 1899 1900 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 1901 1902 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 1903 if (ninfo) { 1904 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 1905 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 1906 mumps->ninfo = ninfo; 1907 for (i = 0; i < ninfo; i++) { 1908 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 1909 mumps->info[i] = info[i]; 1910 } 1911 } 1912 PetscOptionsEnd(); 1913 PetscFunctionReturn(0); 1914 } 1915 1916 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) { 1917 PetscFunctionBegin; 1918 if (mumps->id.INFOG(1) < 0) { 1919 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1920 if (mumps->id.INFOG(1) == -6) { 1921 PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1922 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1923 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1924 PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1925 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1926 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1927 PetscCall(PetscInfo(F, "Empty matrix\n")); 1928 } else { 1929 PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1930 F->factorerrortype = MAT_FACTOR_OTHER; 1931 } 1932 } 1933 PetscFunctionReturn(0); 1934 } 1935 1936 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) { 1937 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1938 Vec b; 1939 const PetscInt M = A->rmap->N; 1940 1941 PetscFunctionBegin; 1942 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 1943 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 1944 PetscFunctionReturn(0); 1945 } 1946 1947 /* Set MUMPS options from the options database */ 1948 PetscCall(MatSetFromOptions_MUMPS(F, A)); 1949 1950 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 1951 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 1952 1953 /* analysis phase */ 1954 /*----------------*/ 1955 mumps->id.job = JOB_FACTSYMBOLIC; 1956 mumps->id.n = M; 1957 switch (mumps->id.ICNTL(18)) { 1958 case 0: /* centralized assembled matrix input */ 1959 if (!mumps->myid) { 1960 mumps->id.nnz = mumps->nnz; 1961 mumps->id.irn = mumps->irn; 1962 mumps->id.jcn = mumps->jcn; 1963 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 1964 if (r) { 1965 mumps->id.ICNTL(7) = 1; 1966 if (!mumps->myid) { 1967 const PetscInt *idx; 1968 PetscInt i; 1969 1970 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 1971 PetscCall(ISGetIndices(r, &idx)); 1972 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 1973 PetscCall(ISRestoreIndices(r, &idx)); 1974 } 1975 } 1976 } 1977 break; 1978 case 3: /* distributed assembled matrix input (size>1) */ 1979 mumps->id.nnz_loc = mumps->nnz; 1980 mumps->id.irn_loc = mumps->irn; 1981 mumps->id.jcn_loc = mumps->jcn; 1982 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 1983 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1984 PetscCall(MatCreateVecs(A, NULL, &b)); 1985 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 1986 PetscCall(VecDestroy(&b)); 1987 } 1988 break; 1989 } 1990 PetscMUMPS_c(mumps); 1991 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 1992 1993 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1994 F->ops->solve = MatSolve_MUMPS; 1995 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1996 F->ops->matsolve = MatMatSolve_MUMPS; 1997 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1998 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 1999 2000 mumps->matstruc = SAME_NONZERO_PATTERN; 2001 PetscFunctionReturn(0); 2002 } 2003 2004 /* Note the Petsc r and c permutations are ignored */ 2005 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) { 2006 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2007 Vec b; 2008 const PetscInt M = A->rmap->N; 2009 2010 PetscFunctionBegin; 2011 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2012 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2013 PetscFunctionReturn(0); 2014 } 2015 2016 /* Set MUMPS options from the options database */ 2017 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2018 2019 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2020 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2021 2022 /* analysis phase */ 2023 /*----------------*/ 2024 mumps->id.job = JOB_FACTSYMBOLIC; 2025 mumps->id.n = M; 2026 switch (mumps->id.ICNTL(18)) { 2027 case 0: /* centralized assembled matrix input */ 2028 if (!mumps->myid) { 2029 mumps->id.nnz = mumps->nnz; 2030 mumps->id.irn = mumps->irn; 2031 mumps->id.jcn = mumps->jcn; 2032 if (mumps->id.ICNTL(6) > 1) { mumps->id.a = (MumpsScalar *)mumps->val; } 2033 } 2034 break; 2035 case 3: /* distributed assembled matrix input (size>1) */ 2036 mumps->id.nnz_loc = mumps->nnz; 2037 mumps->id.irn_loc = mumps->irn; 2038 mumps->id.jcn_loc = mumps->jcn; 2039 if (mumps->id.ICNTL(6) > 1) { mumps->id.a_loc = (MumpsScalar *)mumps->val; } 2040 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2041 PetscCall(MatCreateVecs(A, NULL, &b)); 2042 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2043 PetscCall(VecDestroy(&b)); 2044 } 2045 break; 2046 } 2047 PetscMUMPS_c(mumps); 2048 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2049 2050 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2051 F->ops->solve = MatSolve_MUMPS; 2052 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2053 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2054 2055 mumps->matstruc = SAME_NONZERO_PATTERN; 2056 PetscFunctionReturn(0); 2057 } 2058 2059 /* Note the Petsc r permutation and factor info are ignored */ 2060 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) { 2061 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2062 Vec b; 2063 const PetscInt M = A->rmap->N; 2064 2065 PetscFunctionBegin; 2066 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2067 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2068 PetscFunctionReturn(0); 2069 } 2070 2071 /* Set MUMPS options from the options database */ 2072 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2073 2074 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2075 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2076 2077 /* analysis phase */ 2078 /*----------------*/ 2079 mumps->id.job = JOB_FACTSYMBOLIC; 2080 mumps->id.n = M; 2081 switch (mumps->id.ICNTL(18)) { 2082 case 0: /* centralized assembled matrix input */ 2083 if (!mumps->myid) { 2084 mumps->id.nnz = mumps->nnz; 2085 mumps->id.irn = mumps->irn; 2086 mumps->id.jcn = mumps->jcn; 2087 if (mumps->id.ICNTL(6) > 1) { mumps->id.a = (MumpsScalar *)mumps->val; } 2088 } 2089 break; 2090 case 3: /* distributed assembled matrix input (size>1) */ 2091 mumps->id.nnz_loc = mumps->nnz; 2092 mumps->id.irn_loc = mumps->irn; 2093 mumps->id.jcn_loc = mumps->jcn; 2094 if (mumps->id.ICNTL(6) > 1) { mumps->id.a_loc = (MumpsScalar *)mumps->val; } 2095 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2096 PetscCall(MatCreateVecs(A, NULL, &b)); 2097 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2098 PetscCall(VecDestroy(&b)); 2099 } 2100 break; 2101 } 2102 PetscMUMPS_c(mumps); 2103 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2104 2105 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2106 F->ops->solve = MatSolve_MUMPS; 2107 F->ops->solvetranspose = MatSolve_MUMPS; 2108 F->ops->matsolve = MatMatSolve_MUMPS; 2109 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2110 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2111 #if defined(PETSC_USE_COMPLEX) 2112 F->ops->getinertia = NULL; 2113 #else 2114 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2115 #endif 2116 2117 mumps->matstruc = SAME_NONZERO_PATTERN; 2118 PetscFunctionReturn(0); 2119 } 2120 2121 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) { 2122 PetscBool iascii; 2123 PetscViewerFormat format; 2124 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2125 2126 PetscFunctionBegin; 2127 /* check if matrix is mumps type */ 2128 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2129 2130 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2131 if (iascii) { 2132 PetscCall(PetscViewerGetFormat(viewer, &format)); 2133 if (format == PETSC_VIEWER_ASCII_INFO) { 2134 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2135 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2136 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2137 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2138 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2139 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2140 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2141 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2142 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2143 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2144 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2145 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2146 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2147 if (mumps->id.ICNTL(11) > 0) { 2148 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2149 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2150 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2151 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2152 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g \n", mumps->id.RINFOG(9))); 2153 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2154 } 2155 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2156 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2157 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2158 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2159 /* ICNTL(15-17) not used */ 2160 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2161 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2162 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2163 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2164 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2165 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2166 2167 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2168 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2169 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2170 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2171 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2172 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2173 2174 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2175 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2176 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2177 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2178 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2179 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2180 2181 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g \n", mumps->id.CNTL(1))); 2182 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g \n", mumps->id.CNTL(2))); 2183 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g \n", mumps->id.CNTL(3))); 2184 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g \n", mumps->id.CNTL(4))); 2185 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g \n", mumps->id.CNTL(5))); 2186 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g \n", mumps->id.CNTL(7))); 2187 2188 /* information local to each processor */ 2189 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n")); 2190 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2191 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g \n", mumps->myid, mumps->id.RINFO(1))); 2192 PetscCall(PetscViewerFlush(viewer)); 2193 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n")); 2194 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g \n", mumps->myid, mumps->id.RINFO(2))); 2195 PetscCall(PetscViewerFlush(viewer)); 2196 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n")); 2197 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g \n", mumps->myid, mumps->id.RINFO(3))); 2198 PetscCall(PetscViewerFlush(viewer)); 2199 2200 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n")); 2201 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2202 PetscCall(PetscViewerFlush(viewer)); 2203 2204 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n")); 2205 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2206 PetscCall(PetscViewerFlush(viewer)); 2207 2208 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n")); 2209 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2210 PetscCall(PetscViewerFlush(viewer)); 2211 2212 if (mumps->ninfo && mumps->ninfo <= 80) { 2213 PetscInt i; 2214 for (i = 0; i < mumps->ninfo; i++) { 2215 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "): \n", mumps->info[i])); 2216 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2217 PetscCall(PetscViewerFlush(viewer)); 2218 } 2219 } 2220 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2221 2222 if (!mumps->myid) { /* information from the host */ 2223 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g \n", mumps->id.RINFOG(1))); 2224 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g \n", mumps->id.RINFOG(2))); 2225 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g \n", mumps->id.RINFOG(3))); 2226 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2227 2228 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2229 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2230 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2231 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2232 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2233 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2234 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2235 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2236 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2237 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2238 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2239 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2240 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2241 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2242 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2243 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2244 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2245 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2246 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2247 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2248 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2249 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2250 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2251 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2252 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2253 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2254 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2255 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2256 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2257 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2258 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2259 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2260 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2261 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2262 } 2263 } 2264 } 2265 PetscFunctionReturn(0); 2266 } 2267 2268 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) { 2269 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2270 2271 PetscFunctionBegin; 2272 info->block_size = 1.0; 2273 info->nz_allocated = mumps->id.INFOG(20); 2274 info->nz_used = mumps->id.INFOG(20); 2275 info->nz_unneeded = 0.0; 2276 info->assemblies = 0.0; 2277 info->mallocs = 0.0; 2278 info->memory = 0.0; 2279 info->fill_ratio_given = 0; 2280 info->fill_ratio_needed = 0; 2281 info->factor_mallocs = 0; 2282 PetscFunctionReturn(0); 2283 } 2284 2285 /* -------------------------------------------------------------------------------------------*/ 2286 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) { 2287 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2288 const PetscScalar *arr; 2289 const PetscInt *idxs; 2290 PetscInt size, i; 2291 2292 PetscFunctionBegin; 2293 PetscCall(ISGetLocalSize(is, &size)); 2294 /* Schur complement matrix */ 2295 PetscCall(MatDestroy(&F->schur)); 2296 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2297 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2298 mumps->id.schur = (MumpsScalar *)arr; 2299 mumps->id.size_schur = size; 2300 mumps->id.schur_lld = size; 2301 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2302 if (mumps->sym == 1) { PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); } 2303 2304 /* MUMPS expects Fortran style indices */ 2305 PetscCall(PetscFree(mumps->id.listvar_schur)); 2306 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2307 PetscCall(ISGetIndices(is, &idxs)); 2308 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2309 PetscCall(ISRestoreIndices(is, &idxs)); 2310 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2311 mumps->id.ICNTL(26) = -1; 2312 PetscFunctionReturn(0); 2313 } 2314 2315 /* -------------------------------------------------------------------------------------------*/ 2316 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) { 2317 Mat St; 2318 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2319 PetscScalar *array; 2320 #if defined(PETSC_USE_COMPLEX) 2321 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2322 #endif 2323 2324 PetscFunctionBegin; 2325 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2326 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2327 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2328 PetscCall(MatSetType(St, MATDENSE)); 2329 PetscCall(MatSetUp(St)); 2330 PetscCall(MatDenseGetArray(St, &array)); 2331 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2332 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2333 PetscInt i, j, N = mumps->id.size_schur; 2334 for (i = 0; i < N; i++) { 2335 for (j = 0; j < N; j++) { 2336 #if !defined(PETSC_USE_COMPLEX) 2337 PetscScalar val = mumps->id.schur[i * N + j]; 2338 #else 2339 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2340 #endif 2341 array[j * N + i] = val; 2342 } 2343 } 2344 } else { /* stored by columns */ 2345 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2346 } 2347 } else { /* either full or lower-triangular (not packed) */ 2348 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2349 PetscInt i, j, N = mumps->id.size_schur; 2350 for (i = 0; i < N; i++) { 2351 for (j = i; j < N; j++) { 2352 #if !defined(PETSC_USE_COMPLEX) 2353 PetscScalar val = mumps->id.schur[i * N + j]; 2354 #else 2355 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2356 #endif 2357 array[i * N + j] = val; 2358 array[j * N + i] = val; 2359 } 2360 } 2361 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2362 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2363 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2364 PetscInt i, j, N = mumps->id.size_schur; 2365 for (i = 0; i < N; i++) { 2366 for (j = 0; j < i + 1; j++) { 2367 #if !defined(PETSC_USE_COMPLEX) 2368 PetscScalar val = mumps->id.schur[i * N + j]; 2369 #else 2370 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2371 #endif 2372 array[i * N + j] = val; 2373 array[j * N + i] = val; 2374 } 2375 } 2376 } 2377 } 2378 PetscCall(MatDenseRestoreArray(St, &array)); 2379 *S = St; 2380 PetscFunctionReturn(0); 2381 } 2382 2383 /* -------------------------------------------------------------------------------------------*/ 2384 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) { 2385 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2386 2387 PetscFunctionBegin; 2388 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2389 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2390 for (i = 0; i < nICNTL_pre; ++i) 2391 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2392 if (i == nICNTL_pre) { /* not already cached */ 2393 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2394 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2395 mumps->ICNTL_pre[0]++; 2396 } 2397 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2398 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2399 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2400 PetscFunctionReturn(0); 2401 } 2402 2403 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) { 2404 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2405 2406 PetscFunctionBegin; 2407 *ival = mumps->id.ICNTL(icntl); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 /*@ 2412 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2413 2414 Logically Collective on Mat 2415 2416 Input Parameters: 2417 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2418 . icntl - index of MUMPS parameter array ICNTL() 2419 - ival - value of MUMPS ICNTL(icntl) 2420 2421 Options Database: 2422 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2423 2424 Level: beginner 2425 2426 References: 2427 . * - MUMPS Users' Guide 2428 2429 .seealso: `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2430 @*/ 2431 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) { 2432 PetscFunctionBegin; 2433 PetscValidType(F, 1); 2434 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2435 PetscValidLogicalCollectiveInt(F, icntl, 2); 2436 PetscValidLogicalCollectiveInt(F, ival, 3); 2437 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2438 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2439 PetscFunctionReturn(0); 2440 } 2441 2442 /*@ 2443 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2444 2445 Logically Collective on Mat 2446 2447 Input Parameters: 2448 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2449 - icntl - index of MUMPS parameter array ICNTL() 2450 2451 Output Parameter: 2452 . ival - value of MUMPS ICNTL(icntl) 2453 2454 Level: beginner 2455 2456 References: 2457 . * - MUMPS Users' Guide 2458 2459 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2460 @*/ 2461 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) { 2462 PetscFunctionBegin; 2463 PetscValidType(F, 1); 2464 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2465 PetscValidLogicalCollectiveInt(F, icntl, 2); 2466 PetscValidIntPointer(ival, 3); 2467 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2468 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2469 PetscFunctionReturn(0); 2470 } 2471 2472 /* -------------------------------------------------------------------------------------------*/ 2473 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) { 2474 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2475 2476 PetscFunctionBegin; 2477 if (mumps->id.job == JOB_NULL) { 2478 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2479 for (i = 0; i < nCNTL_pre; ++i) 2480 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2481 if (i == nCNTL_pre) { 2482 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2483 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2484 mumps->CNTL_pre[0]++; 2485 } 2486 mumps->CNTL_pre[1 + 2 * i] = icntl; 2487 mumps->CNTL_pre[2 + 2 * i] = val; 2488 } else mumps->id.CNTL(icntl) = val; 2489 PetscFunctionReturn(0); 2490 } 2491 2492 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) { 2493 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2494 2495 PetscFunctionBegin; 2496 *val = mumps->id.CNTL(icntl); 2497 PetscFunctionReturn(0); 2498 } 2499 2500 /*@ 2501 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2502 2503 Logically Collective on Mat 2504 2505 Input Parameters: 2506 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2507 . icntl - index of MUMPS parameter array CNTL() 2508 - val - value of MUMPS CNTL(icntl) 2509 2510 Options Database: 2511 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2512 2513 Level: beginner 2514 2515 References: 2516 . * - MUMPS Users' Guide 2517 2518 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2519 @*/ 2520 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) { 2521 PetscFunctionBegin; 2522 PetscValidType(F, 1); 2523 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2524 PetscValidLogicalCollectiveInt(F, icntl, 2); 2525 PetscValidLogicalCollectiveReal(F, val, 3); 2526 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2527 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2528 PetscFunctionReturn(0); 2529 } 2530 2531 /*@ 2532 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2533 2534 Logically Collective on Mat 2535 2536 Input Parameters: 2537 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2538 - icntl - index of MUMPS parameter array CNTL() 2539 2540 Output Parameter: 2541 . val - value of MUMPS CNTL(icntl) 2542 2543 Level: beginner 2544 2545 References: 2546 . * - MUMPS Users' Guide 2547 2548 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2549 @*/ 2550 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) { 2551 PetscFunctionBegin; 2552 PetscValidType(F, 1); 2553 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2554 PetscValidLogicalCollectiveInt(F, icntl, 2); 2555 PetscValidRealPointer(val, 3); 2556 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2557 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) { 2562 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2563 2564 PetscFunctionBegin; 2565 *info = mumps->id.INFO(icntl); 2566 PetscFunctionReturn(0); 2567 } 2568 2569 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) { 2570 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2571 2572 PetscFunctionBegin; 2573 *infog = mumps->id.INFOG(icntl); 2574 PetscFunctionReturn(0); 2575 } 2576 2577 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) { 2578 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2579 2580 PetscFunctionBegin; 2581 *rinfo = mumps->id.RINFO(icntl); 2582 PetscFunctionReturn(0); 2583 } 2584 2585 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) { 2586 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2587 2588 PetscFunctionBegin; 2589 *rinfog = mumps->id.RINFOG(icntl); 2590 PetscFunctionReturn(0); 2591 } 2592 2593 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) { 2594 Mat Bt = NULL, Btseq = NULL; 2595 PetscBool flg; 2596 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2597 PetscScalar *aa; 2598 PetscInt spnr, *ia, *ja, M, nrhs; 2599 2600 PetscFunctionBegin; 2601 PetscValidPointer(spRHS, 2); 2602 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEMAT, &flg)); 2603 if (flg) { 2604 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2605 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2606 2607 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2608 2609 if (mumps->petsc_size > 1) { 2610 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2611 Btseq = b->A; 2612 } else { 2613 Btseq = Bt; 2614 } 2615 2616 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2617 mumps->id.nrhs = nrhs; 2618 mumps->id.lrhs = M; 2619 mumps->id.rhs = NULL; 2620 2621 if (!mumps->myid) { 2622 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2623 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2624 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2625 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2626 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2627 } else { 2628 mumps->id.irhs_ptr = NULL; 2629 mumps->id.irhs_sparse = NULL; 2630 mumps->id.nz_rhs = 0; 2631 mumps->id.rhs_sparse = NULL; 2632 } 2633 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2634 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2635 2636 /* solve phase */ 2637 /*-------------*/ 2638 mumps->id.job = JOB_SOLVE; 2639 PetscMUMPS_c(mumps); 2640 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 2641 2642 if (!mumps->myid) { 2643 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 2644 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2645 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2646 } 2647 PetscFunctionReturn(0); 2648 } 2649 2650 /*@ 2651 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2652 2653 Logically Collective on Mat 2654 2655 Input Parameters: 2656 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2657 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2658 2659 Output Parameter: 2660 . spRHS - requested entries of inverse of A 2661 2662 Level: beginner 2663 2664 References: 2665 . * - MUMPS Users' Guide 2666 2667 .seealso: `MatGetFactor()`, `MatCreateTranspose()` 2668 @*/ 2669 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) { 2670 PetscFunctionBegin; 2671 PetscValidType(F, 1); 2672 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2673 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 2674 PetscFunctionReturn(0); 2675 } 2676 2677 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) { 2678 Mat spRHS; 2679 2680 PetscFunctionBegin; 2681 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 2682 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 2683 PetscCall(MatDestroy(&spRHS)); 2684 PetscFunctionReturn(0); 2685 } 2686 2687 /*@ 2688 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2689 2690 Logically Collective on Mat 2691 2692 Input Parameters: 2693 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2694 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2695 2696 Output Parameter: 2697 . spRHST - requested entries of inverse of A^T 2698 2699 Level: beginner 2700 2701 References: 2702 . * - MUMPS Users' Guide 2703 2704 .seealso: `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 2705 @*/ 2706 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) { 2707 PetscBool flg; 2708 2709 PetscFunctionBegin; 2710 PetscValidType(F, 1); 2711 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2712 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 2713 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 2714 2715 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 2716 PetscFunctionReturn(0); 2717 } 2718 2719 /*@ 2720 MatMumpsGetInfo - Get MUMPS parameter INFO() 2721 2722 Logically Collective on Mat 2723 2724 Input Parameters: 2725 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2726 - icntl - index of MUMPS parameter array INFO() 2727 2728 Output Parameter: 2729 . ival - value of MUMPS INFO(icntl) 2730 2731 Level: beginner 2732 2733 References: 2734 . * - MUMPS Users' Guide 2735 2736 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2737 @*/ 2738 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) { 2739 PetscFunctionBegin; 2740 PetscValidType(F, 1); 2741 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2742 PetscValidIntPointer(ival, 3); 2743 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2744 PetscFunctionReturn(0); 2745 } 2746 2747 /*@ 2748 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2749 2750 Logically Collective on Mat 2751 2752 Input Parameters: 2753 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2754 - icntl - index of MUMPS parameter array INFOG() 2755 2756 Output Parameter: 2757 . ival - value of MUMPS INFOG(icntl) 2758 2759 Level: beginner 2760 2761 References: 2762 . * - MUMPS Users' Guide 2763 2764 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2765 @*/ 2766 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) { 2767 PetscFunctionBegin; 2768 PetscValidType(F, 1); 2769 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2770 PetscValidIntPointer(ival, 3); 2771 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2772 PetscFunctionReturn(0); 2773 } 2774 2775 /*@ 2776 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2777 2778 Logically Collective on Mat 2779 2780 Input Parameters: 2781 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2782 - icntl - index of MUMPS parameter array RINFO() 2783 2784 Output Parameter: 2785 . val - value of MUMPS RINFO(icntl) 2786 2787 Level: beginner 2788 2789 References: 2790 . * - MUMPS Users' Guide 2791 2792 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 2793 @*/ 2794 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) { 2795 PetscFunctionBegin; 2796 PetscValidType(F, 1); 2797 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2798 PetscValidRealPointer(val, 3); 2799 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2800 PetscFunctionReturn(0); 2801 } 2802 2803 /*@ 2804 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2805 2806 Logically Collective on Mat 2807 2808 Input Parameters: 2809 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2810 - icntl - index of MUMPS parameter array RINFOG() 2811 2812 Output Parameter: 2813 . val - value of MUMPS RINFOG(icntl) 2814 2815 Level: beginner 2816 2817 References: 2818 . * - MUMPS Users' Guide 2819 2820 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 2821 @*/ 2822 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) { 2823 PetscFunctionBegin; 2824 PetscValidType(F, 1); 2825 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2826 PetscValidRealPointer(val, 3); 2827 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2828 PetscFunctionReturn(0); 2829 } 2830 2831 /*MC 2832 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2833 distributed and sequential matrices via the external package MUMPS. 2834 2835 Works with MATAIJ and MATSBAIJ matrices 2836 2837 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2838 2839 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2840 2841 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2842 2843 Options Database Keys: 2844 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2845 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2846 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2847 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2848 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2849 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2850 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 2851 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2852 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2853 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2854 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2855 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2856 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2857 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 2858 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2859 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2860 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2861 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2862 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2863 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2864 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2865 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2866 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2867 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2868 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2869 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2870 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2871 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2872 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2873 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2874 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2875 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2876 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2877 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2878 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2879 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2880 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2881 2882 Level: beginner 2883 2884 Notes: 2885 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2886 2887 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 2888 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 2889 2890 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2891 $ KSPGetPC(ksp,&pc); 2892 $ PCFactorGetMatrix(pc,&mat); 2893 $ MatMumpsGetInfo(mat,....); 2894 $ MatMumpsGetInfog(mat,....); etc. 2895 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2896 2897 Using MUMPS with 64-bit integers 2898 MUMPS provides 64-bit integer support in two build modes: 2899 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 2900 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 2901 2902 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 2903 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 2904 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 2905 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 2906 2907 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 2908 2909 Two modes to run MUMPS/PETSc with OpenMP 2910 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2911 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2912 2913 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2914 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2915 2916 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2917 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2918 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2919 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2920 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2921 2922 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2923 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2924 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2925 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2926 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2927 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2928 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2929 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2930 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2931 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2932 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2933 examine the mapping result. 2934 2935 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2936 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2937 calls omp_set_num_threads(m) internally before calling MUMPS. 2938 2939 References: 2940 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2941 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2942 2943 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 2944 2945 M*/ 2946 2947 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) { 2948 PetscFunctionBegin; 2949 *type = MATSOLVERMUMPS; 2950 PetscFunctionReturn(0); 2951 } 2952 2953 /* MatGetFactor for Seq and MPI AIJ matrices */ 2954 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) { 2955 Mat B; 2956 Mat_MUMPS *mumps; 2957 PetscBool isSeqAIJ; 2958 PetscMPIInt size; 2959 2960 PetscFunctionBegin; 2961 #if defined(PETSC_USE_COMPLEX) 2962 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 2963 #endif 2964 /* Create the factorization matrix */ 2965 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 2966 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2967 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2968 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 2969 PetscCall(MatSetUp(B)); 2970 2971 PetscCall(PetscNewLog(B, &mumps)); 2972 2973 B->ops->view = MatView_MUMPS; 2974 B->ops->getinfo = MatGetInfo_MUMPS; 2975 2976 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 2977 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 2978 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 2979 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 2980 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 2981 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 2982 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 2983 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 2984 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 2985 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 2986 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 2987 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 2988 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 2989 2990 if (ftype == MAT_FACTOR_LU) { 2991 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2992 B->factortype = MAT_FACTOR_LU; 2993 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2994 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2995 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 2996 mumps->sym = 0; 2997 } else { 2998 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2999 B->factortype = MAT_FACTOR_CHOLESKY; 3000 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3001 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3002 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3003 #if defined(PETSC_USE_COMPLEX) 3004 mumps->sym = 2; 3005 #else 3006 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3007 else mumps->sym = 2; 3008 #endif 3009 } 3010 3011 /* set solvertype */ 3012 PetscCall(PetscFree(B->solvertype)); 3013 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3014 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3015 if (size == 1) { 3016 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3017 B->canuseordering = PETSC_TRUE; 3018 } 3019 B->ops->destroy = MatDestroy_MUMPS; 3020 B->data = (void *)mumps; 3021 3022 *F = B; 3023 mumps->id.job = JOB_NULL; 3024 mumps->ICNTL_pre = NULL; 3025 mumps->CNTL_pre = NULL; 3026 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3027 PetscFunctionReturn(0); 3028 } 3029 3030 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3031 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) { 3032 Mat B; 3033 Mat_MUMPS *mumps; 3034 PetscBool isSeqSBAIJ; 3035 PetscMPIInt size; 3036 3037 PetscFunctionBegin; 3038 #if defined(PETSC_USE_COMPLEX) 3039 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3040 #endif 3041 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3042 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3043 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3044 PetscCall(MatSetUp(B)); 3045 3046 PetscCall(PetscNewLog(B, &mumps)); 3047 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3048 if (isSeqSBAIJ) { 3049 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3050 } else { 3051 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3052 } 3053 3054 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3055 B->ops->view = MatView_MUMPS; 3056 B->ops->getinfo = MatGetInfo_MUMPS; 3057 3058 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3059 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3060 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3061 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3062 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3063 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3064 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3065 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3066 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3067 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3068 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3069 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3070 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3071 3072 B->factortype = MAT_FACTOR_CHOLESKY; 3073 #if defined(PETSC_USE_COMPLEX) 3074 mumps->sym = 2; 3075 #else 3076 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3077 else mumps->sym = 2; 3078 #endif 3079 3080 /* set solvertype */ 3081 PetscCall(PetscFree(B->solvertype)); 3082 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3083 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3084 if (size == 1) { 3085 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3086 B->canuseordering = PETSC_TRUE; 3087 } 3088 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3089 B->ops->destroy = MatDestroy_MUMPS; 3090 B->data = (void *)mumps; 3091 3092 *F = B; 3093 mumps->id.job = JOB_NULL; 3094 mumps->ICNTL_pre = NULL; 3095 mumps->CNTL_pre = NULL; 3096 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3097 PetscFunctionReturn(0); 3098 } 3099 3100 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) { 3101 Mat B; 3102 Mat_MUMPS *mumps; 3103 PetscBool isSeqBAIJ; 3104 PetscMPIInt size; 3105 3106 PetscFunctionBegin; 3107 /* Create the factorization matrix */ 3108 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3109 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3110 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3111 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3112 PetscCall(MatSetUp(B)); 3113 3114 PetscCall(PetscNewLog(B, &mumps)); 3115 if (ftype == MAT_FACTOR_LU) { 3116 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3117 B->factortype = MAT_FACTOR_LU; 3118 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3119 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3120 mumps->sym = 0; 3121 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3122 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3123 3124 B->ops->view = MatView_MUMPS; 3125 B->ops->getinfo = MatGetInfo_MUMPS; 3126 3127 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3128 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3129 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3130 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3131 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3132 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3133 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3134 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3135 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3136 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3137 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3138 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3139 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3140 3141 /* set solvertype */ 3142 PetscCall(PetscFree(B->solvertype)); 3143 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3144 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3145 if (size == 1) { 3146 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3147 B->canuseordering = PETSC_TRUE; 3148 } 3149 B->ops->destroy = MatDestroy_MUMPS; 3150 B->data = (void *)mumps; 3151 3152 *F = B; 3153 mumps->id.job = JOB_NULL; 3154 mumps->ICNTL_pre = NULL; 3155 mumps->CNTL_pre = NULL; 3156 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3157 PetscFunctionReturn(0); 3158 } 3159 3160 /* MatGetFactor for Seq and MPI SELL matrices */ 3161 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) { 3162 Mat B; 3163 Mat_MUMPS *mumps; 3164 PetscBool isSeqSELL; 3165 PetscMPIInt size; 3166 3167 PetscFunctionBegin; 3168 /* Create the factorization matrix */ 3169 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3170 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3171 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3172 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3173 PetscCall(MatSetUp(B)); 3174 3175 PetscCall(PetscNewLog(B, &mumps)); 3176 3177 B->ops->view = MatView_MUMPS; 3178 B->ops->getinfo = MatGetInfo_MUMPS; 3179 3180 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3181 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3182 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3183 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3184 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3185 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3186 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3187 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3188 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3189 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3190 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3191 3192 if (ftype == MAT_FACTOR_LU) { 3193 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3194 B->factortype = MAT_FACTOR_LU; 3195 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3196 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3197 mumps->sym = 0; 3198 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3199 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3200 3201 /* set solvertype */ 3202 PetscCall(PetscFree(B->solvertype)); 3203 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3204 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3205 if (size == 1) { 3206 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3207 B->canuseordering = PETSC_TRUE; 3208 } 3209 B->ops->destroy = MatDestroy_MUMPS; 3210 B->data = (void *)mumps; 3211 3212 *F = B; 3213 mumps->id.job = JOB_NULL; 3214 mumps->ICNTL_pre = NULL; 3215 mumps->CNTL_pre = NULL; 3216 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3217 PetscFunctionReturn(0); 3218 } 3219 3220 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) { 3221 PetscFunctionBegin; 3222 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3223 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3224 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3225 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3226 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3227 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3228 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3229 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3230 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3231 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3232 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3233 PetscFunctionReturn(0); 3234 } 3235