1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <petscsf.h> 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 9 #include <../src/mat/impls/sell/mpi/mpisell.h> 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #if defined(PETSC_USE_REAL_SINGLE) 14 #include <cmumps_c.h> 15 #else 16 #include <zmumps_c.h> 17 #endif 18 #else 19 #if defined(PETSC_USE_REAL_SINGLE) 20 #include <smumps_c.h> 21 #else 22 #include <dmumps_c.h> 23 #endif 24 #endif 25 EXTERN_C_END 26 #define JOB_INIT -1 27 #define JOB_NULL 0 28 #define JOB_FACTSYMBOLIC 1 29 #define JOB_FACTNUMERIC 2 30 #define JOB_SOLVE 3 31 #define JOB_END -2 32 33 /* calls to MUMPS */ 34 #if defined(PETSC_USE_COMPLEX) 35 #if defined(PETSC_USE_REAL_SINGLE) 36 #define MUMPS_c cmumps_c 37 #else 38 #define MUMPS_c zmumps_c 39 #endif 40 #else 41 #if defined(PETSC_USE_REAL_SINGLE) 42 #define MUMPS_c smumps_c 43 #else 44 #define MUMPS_c dmumps_c 45 #endif 46 #endif 47 48 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 49 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 50 naming convention in PetscMPIInt, PetscBLASInt etc. 51 */ 52 typedef MUMPS_INT PetscMUMPSInt; 53 54 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 55 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 56 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 57 #endif 58 #else 59 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 60 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 61 #endif 62 #endif 63 64 #define MPIU_MUMPSINT MPI_INT 65 #define PETSC_MUMPS_INT_MAX 2147483647 66 #define PETSC_MUMPS_INT_MIN -2147483648 67 68 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 69 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 70 { 71 PetscFunctionBegin; 72 #if PetscDefined(USE_64BIT_INDICES) 73 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 74 #endif 75 *b = (PetscMUMPSInt)(a); 76 PetscFunctionReturn(PETSC_SUCCESS); 77 } 78 79 /* Put these utility routines here since they are only used in this file */ 80 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 81 { 82 PetscInt myval; 83 PetscBool myset; 84 PetscFunctionBegin; 85 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 86 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 87 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 88 if (set) *set = myset; 89 PetscFunctionReturn(PETSC_SUCCESS); 90 } 91 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 92 93 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 94 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 95 #define PetscMUMPS_c(mumps) \ 96 do { \ 97 if (mumps->use_petsc_omp_support) { \ 98 if (mumps->is_omp_master) { \ 99 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 100 MUMPS_c(&mumps->id); \ 101 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 102 } \ 103 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 104 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 105 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 106 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 107 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 108 */ \ 109 PetscCallMPI(MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 110 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm)); \ 111 PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 112 } else { \ 113 MUMPS_c(&mumps->id); \ 114 } \ 115 } while (0) 116 #else 117 #define PetscMUMPS_c(mumps) \ 118 do { \ 119 MUMPS_c(&mumps->id); \ 120 } while (0) 121 #endif 122 123 /* declare MumpsScalar */ 124 #if defined(PETSC_USE_COMPLEX) 125 #if defined(PETSC_USE_REAL_SINGLE) 126 #define MumpsScalar mumps_complex 127 #else 128 #define MumpsScalar mumps_double_complex 129 #endif 130 #else 131 #define MumpsScalar PetscScalar 132 #endif 133 134 /* macros s.t. indices match MUMPS documentation */ 135 #define ICNTL(I) icntl[(I)-1] 136 #define CNTL(I) cntl[(I)-1] 137 #define INFOG(I) infog[(I)-1] 138 #define INFO(I) info[(I)-1] 139 #define RINFOG(I) rinfog[(I)-1] 140 #define RINFO(I) rinfo[(I)-1] 141 142 typedef struct Mat_MUMPS Mat_MUMPS; 143 struct Mat_MUMPS { 144 #if defined(PETSC_USE_COMPLEX) 145 #if defined(PETSC_USE_REAL_SINGLE) 146 CMUMPS_STRUC_C id; 147 #else 148 ZMUMPS_STRUC_C id; 149 #endif 150 #else 151 #if defined(PETSC_USE_REAL_SINGLE) 152 SMUMPS_STRUC_C id; 153 #else 154 DMUMPS_STRUC_C id; 155 #endif 156 #endif 157 158 MatStructure matstruc; 159 PetscMPIInt myid, petsc_size; 160 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 161 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 162 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 163 PetscMUMPSInt sym; 164 MPI_Comm mumps_comm; 165 PetscMUMPSInt *ICNTL_pre; 166 PetscReal *CNTL_pre; 167 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 168 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 169 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 170 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 171 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 172 PetscInt *rhs_nrow, max_nrhs; 173 PetscMPIInt *rhs_recvcounts, *rhs_disps; 174 PetscScalar *rhs_loc, *rhs_recvbuf; 175 #endif 176 Vec b_seq, x_seq; 177 PetscInt ninfo, *info; /* which INFO to display */ 178 PetscInt sizeredrhs; 179 PetscScalar *schur_sol; 180 PetscInt schur_sizesol; 181 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 182 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 183 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 184 185 /* Support for MATNEST */ 186 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 187 PetscInt64 *nest_vals_start; 188 PetscScalar *nest_vals; 189 190 /* stuff used by petsc/mumps OpenMP support*/ 191 PetscBool use_petsc_omp_support; 192 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 193 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 194 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 195 PetscMPIInt tag, omp_comm_size; 196 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 197 MPI_Request *reqs; 198 }; 199 200 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 201 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 202 */ 203 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 204 { 205 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 206 207 PetscFunctionBegin; 208 #if defined(PETSC_USE_64BIT_INDICES) 209 { 210 PetscInt i; 211 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 212 PetscCall(PetscFree(mumps->ia_alloc)); 213 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 214 mumps->cur_ilen = nrow + 1; 215 } 216 if (nnz > mumps->cur_jlen) { 217 PetscCall(PetscFree(mumps->ja_alloc)); 218 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 219 mumps->cur_jlen = nnz; 220 } 221 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 222 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 223 *ia_mumps = mumps->ia_alloc; 224 *ja_mumps = mumps->ja_alloc; 225 } 226 #else 227 *ia_mumps = ia; 228 *ja_mumps = ja; 229 #endif 230 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 231 PetscFunctionReturn(PETSC_SUCCESS); 232 } 233 234 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 235 { 236 PetscFunctionBegin; 237 PetscCall(PetscFree(mumps->id.listvar_schur)); 238 PetscCall(PetscFree(mumps->id.redrhs)); 239 PetscCall(PetscFree(mumps->schur_sol)); 240 mumps->id.size_schur = 0; 241 mumps->id.schur_lld = 0; 242 mumps->id.ICNTL(19) = 0; 243 PetscFunctionReturn(PETSC_SUCCESS); 244 } 245 246 /* solve with rhs in mumps->id.redrhs and return in the same location */ 247 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 248 { 249 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 250 Mat S, B, X; 251 MatFactorSchurStatus schurstatus; 252 PetscInt sizesol; 253 254 PetscFunctionBegin; 255 PetscCall(MatFactorFactorizeSchurComplement(F)); 256 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 257 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 258 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 259 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 260 PetscCall(MatBindToCPU(B, S->boundtocpu)); 261 #endif 262 switch (schurstatus) { 263 case MAT_FACTOR_SCHUR_FACTORED: 264 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 265 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 266 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 267 PetscCall(MatBindToCPU(X, S->boundtocpu)); 268 #endif 269 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 270 PetscCall(MatMatSolveTranspose(S, B, X)); 271 } else { 272 PetscCall(MatMatSolve(S, B, X)); 273 } 274 break; 275 case MAT_FACTOR_SCHUR_INVERTED: 276 sizesol = mumps->id.nrhs * mumps->id.size_schur; 277 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 278 PetscCall(PetscFree(mumps->schur_sol)); 279 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 280 mumps->schur_sizesol = sizesol; 281 } 282 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 283 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 284 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 285 PetscCall(MatBindToCPU(X, S->boundtocpu)); 286 #endif 287 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 288 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 289 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 290 } else { 291 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 292 } 293 PetscCall(MatProductSetFromOptions(X)); 294 PetscCall(MatProductSymbolic(X)); 295 PetscCall(MatProductNumeric(X)); 296 297 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 298 break; 299 default: 300 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 301 } 302 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 303 PetscCall(MatDestroy(&B)); 304 PetscCall(MatDestroy(&X)); 305 PetscFunctionReturn(PETSC_SUCCESS); 306 } 307 308 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 309 { 310 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 311 312 PetscFunctionBegin; 313 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 if (!expansion) { /* prepare for the condensation step */ 317 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 318 /* allocate MUMPS internal array to store reduced right-hand sides */ 319 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 320 PetscCall(PetscFree(mumps->id.redrhs)); 321 mumps->id.lredrhs = mumps->id.size_schur; 322 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 323 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 324 } 325 mumps->id.ICNTL(26) = 1; /* condensation phase */ 326 } else { /* prepare for the expansion step */ 327 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 328 PetscCall(MatMumpsSolveSchur_Private(F)); 329 mumps->id.ICNTL(26) = 2; /* expansion phase */ 330 PetscMUMPS_c(mumps); 331 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 332 /* restore defaults */ 333 mumps->id.ICNTL(26) = -1; 334 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 335 if (mumps->id.nrhs > 1) { 336 PetscCall(PetscFree(mumps->id.redrhs)); 337 mumps->id.lredrhs = 0; 338 mumps->sizeredrhs = 0; 339 } 340 } 341 PetscFunctionReturn(PETSC_SUCCESS); 342 } 343 344 /* 345 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 346 347 input: 348 A - matrix in aij,baij or sbaij format 349 shift - 0: C style output triple; 1: Fortran style output triple. 350 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 351 MAT_REUSE_MATRIX: only the values in v array are updated 352 output: 353 nnz - dim of r, c, and v (number of local nonzero entries of A) 354 r, c, v - row and col index, matrix values (matrix triples) 355 356 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 357 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 358 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 359 360 */ 361 362 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 363 { 364 const PetscScalar *av; 365 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 366 PetscInt64 nz, rnz, i, j, k; 367 PetscMUMPSInt *row, *col; 368 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 369 370 PetscFunctionBegin; 371 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 372 mumps->val = (PetscScalar *)av; 373 if (reuse == MAT_INITIAL_MATRIX) { 374 nz = aa->nz; 375 ai = aa->i; 376 aj = aa->j; 377 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 378 for (i = k = 0; i < M; i++) { 379 rnz = ai[i + 1] - ai[i]; 380 ajj = aj + ai[i]; 381 for (j = 0; j < rnz; j++) { 382 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 383 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 384 k++; 385 } 386 } 387 mumps->irn = row; 388 mumps->jcn = col; 389 mumps->nnz = nz; 390 } 391 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 392 PetscFunctionReturn(PETSC_SUCCESS); 393 } 394 395 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 396 { 397 PetscInt64 nz, i, j, k, r; 398 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 399 PetscMUMPSInt *row, *col; 400 401 PetscFunctionBegin; 402 mumps->val = a->val; 403 if (reuse == MAT_INITIAL_MATRIX) { 404 nz = a->sliidx[a->totalslices]; 405 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 406 for (i = k = 0; i < a->totalslices; i++) { 407 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 408 } 409 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 410 mumps->irn = row; 411 mumps->jcn = col; 412 mumps->nnz = nz; 413 } 414 PetscFunctionReturn(PETSC_SUCCESS); 415 } 416 417 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 418 { 419 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 420 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 421 PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 422 PetscInt bs; 423 PetscMUMPSInt *row, *col; 424 425 PetscFunctionBegin; 426 PetscCall(MatGetBlockSize(A, &bs)); 427 M = A->rmap->N / bs; 428 mumps->val = aa->a; 429 if (reuse == MAT_INITIAL_MATRIX) { 430 ai = aa->i; 431 aj = aa->j; 432 nz = bs2 * aa->nz; 433 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 434 for (i = 0; i < M; i++) { 435 ajj = aj + ai[i]; 436 rnz = ai[i + 1] - ai[i]; 437 for (k = 0; k < rnz; k++) { 438 for (j = 0; j < bs; j++) { 439 for (m = 0; m < bs; m++) { 440 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 441 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 442 idx++; 443 } 444 } 445 } 446 } 447 mumps->irn = row; 448 mumps->jcn = col; 449 mumps->nnz = nz; 450 } 451 PetscFunctionReturn(PETSC_SUCCESS); 452 } 453 454 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 455 { 456 const PetscInt *ai, *aj, *ajj; 457 PetscInt bs; 458 PetscInt64 nz, rnz, i, j, k, m; 459 PetscMUMPSInt *row, *col; 460 PetscScalar *val; 461 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 462 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 463 #if defined(PETSC_USE_COMPLEX) 464 PetscBool isset, hermitian; 465 #endif 466 467 PetscFunctionBegin; 468 #if defined(PETSC_USE_COMPLEX) 469 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 470 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 471 #endif 472 ai = aa->i; 473 aj = aa->j; 474 PetscCall(MatGetBlockSize(A, &bs)); 475 if (reuse == MAT_INITIAL_MATRIX) { 476 const PetscInt64 alloc_size = aa->nz * bs2; 477 478 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 479 if (bs > 1) { 480 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 481 mumps->val = mumps->val_alloc; 482 } else { 483 mumps->val = aa->a; 484 } 485 mumps->irn = row; 486 mumps->jcn = col; 487 } else { 488 if (bs == 1) mumps->val = aa->a; 489 row = mumps->irn; 490 col = mumps->jcn; 491 } 492 val = mumps->val; 493 494 nz = 0; 495 if (bs > 1) { 496 for (i = 0; i < mbs; i++) { 497 rnz = ai[i + 1] - ai[i]; 498 ajj = aj + ai[i]; 499 for (j = 0; j < rnz; j++) { 500 for (k = 0; k < bs; k++) { 501 for (m = 0; m < bs; m++) { 502 if (ajj[j] > i || k >= m) { 503 if (reuse == MAT_INITIAL_MATRIX) { 504 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 505 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 506 } 507 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 508 } 509 } 510 } 511 } 512 } 513 } else if (reuse == MAT_INITIAL_MATRIX) { 514 for (i = 0; i < mbs; i++) { 515 rnz = ai[i + 1] - ai[i]; 516 ajj = aj + ai[i]; 517 for (j = 0; j < rnz; j++) { 518 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 519 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 520 nz++; 521 } 522 } 523 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 524 } 525 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 530 { 531 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 532 PetscInt64 nz, rnz, i, j; 533 const PetscScalar *av, *v1; 534 PetscScalar *val; 535 PetscMUMPSInt *row, *col; 536 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 537 PetscBool missing; 538 #if defined(PETSC_USE_COMPLEX) 539 PetscBool hermitian, isset; 540 #endif 541 542 PetscFunctionBegin; 543 #if defined(PETSC_USE_COMPLEX) 544 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 545 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 546 #endif 547 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 548 ai = aa->i; 549 aj = aa->j; 550 adiag = aa->diag; 551 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 552 if (reuse == MAT_INITIAL_MATRIX) { 553 /* count nz in the upper triangular part of A */ 554 nz = 0; 555 if (missing) { 556 for (i = 0; i < M; i++) { 557 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 558 for (j = ai[i]; j < ai[i + 1]; j++) { 559 if (aj[j] < i) continue; 560 nz++; 561 } 562 } else { 563 nz += ai[i + 1] - adiag[i]; 564 } 565 } 566 } else { 567 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 568 } 569 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 570 PetscCall(PetscMalloc1(nz, &val)); 571 mumps->nnz = nz; 572 mumps->irn = row; 573 mumps->jcn = col; 574 mumps->val = mumps->val_alloc = val; 575 576 nz = 0; 577 if (missing) { 578 for (i = 0; i < M; i++) { 579 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 580 for (j = ai[i]; j < ai[i + 1]; j++) { 581 if (aj[j] < i) continue; 582 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 583 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 584 val[nz] = av[j]; 585 nz++; 586 } 587 } else { 588 rnz = ai[i + 1] - adiag[i]; 589 ajj = aj + adiag[i]; 590 v1 = av + adiag[i]; 591 for (j = 0; j < rnz; j++) { 592 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 593 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 594 val[nz++] = v1[j]; 595 } 596 } 597 } 598 } else { 599 for (i = 0; i < M; i++) { 600 rnz = ai[i + 1] - adiag[i]; 601 ajj = aj + adiag[i]; 602 v1 = av + adiag[i]; 603 for (j = 0; j < rnz; j++) { 604 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 605 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 606 val[nz++] = v1[j]; 607 } 608 } 609 } 610 } else { 611 nz = 0; 612 val = mumps->val; 613 if (missing) { 614 for (i = 0; i < M; i++) { 615 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 616 for (j = ai[i]; j < ai[i + 1]; j++) { 617 if (aj[j] < i) continue; 618 val[nz++] = av[j]; 619 } 620 } else { 621 rnz = ai[i + 1] - adiag[i]; 622 v1 = av + adiag[i]; 623 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 624 } 625 } 626 } else { 627 for (i = 0; i < M; i++) { 628 rnz = ai[i + 1] - adiag[i]; 629 v1 = av + adiag[i]; 630 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 631 } 632 } 633 } 634 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 635 PetscFunctionReturn(PETSC_SUCCESS); 636 } 637 638 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 639 { 640 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 641 PetscInt bs; 642 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 643 PetscMUMPSInt *row, *col; 644 const PetscScalar *av, *bv, *v1, *v2; 645 PetscScalar *val; 646 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 647 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 648 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 649 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 650 #if defined(PETSC_USE_COMPLEX) 651 PetscBool hermitian, isset; 652 #endif 653 654 PetscFunctionBegin; 655 #if defined(PETSC_USE_COMPLEX) 656 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 657 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 658 #endif 659 PetscCall(MatGetBlockSize(A, &bs)); 660 rstart = A->rmap->rstart; 661 ai = aa->i; 662 aj = aa->j; 663 bi = bb->i; 664 bj = bb->j; 665 av = aa->a; 666 bv = bb->a; 667 668 garray = mat->garray; 669 670 if (reuse == MAT_INITIAL_MATRIX) { 671 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 672 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 673 PetscCall(PetscMalloc1(nz, &val)); 674 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 675 mumps->irn = row; 676 mumps->jcn = col; 677 mumps->val = mumps->val_alloc = val; 678 } else { 679 val = mumps->val; 680 } 681 682 jj = 0; 683 irow = rstart; 684 for (i = 0; i < mbs; i++) { 685 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 686 countA = ai[i + 1] - ai[i]; 687 countB = bi[i + 1] - bi[i]; 688 bjj = bj + bi[i]; 689 v1 = av + ai[i] * bs2; 690 v2 = bv + bi[i] * bs2; 691 692 if (bs > 1) { 693 /* A-part */ 694 for (j = 0; j < countA; j++) { 695 for (k = 0; k < bs; k++) { 696 for (m = 0; m < bs; m++) { 697 if (rstart + ajj[j] * bs > irow || k >= m) { 698 if (reuse == MAT_INITIAL_MATRIX) { 699 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 700 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 701 } 702 val[jj++] = v1[j * bs2 + m + k * bs]; 703 } 704 } 705 } 706 } 707 708 /* B-part */ 709 for (j = 0; j < countB; j++) { 710 for (k = 0; k < bs; k++) { 711 for (m = 0; m < bs; m++) { 712 if (reuse == MAT_INITIAL_MATRIX) { 713 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 714 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 715 } 716 val[jj++] = v2[j * bs2 + m + k * bs]; 717 } 718 } 719 } 720 } else { 721 /* A-part */ 722 for (j = 0; j < countA; j++) { 723 if (reuse == MAT_INITIAL_MATRIX) { 724 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 725 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 726 } 727 val[jj++] = v1[j]; 728 } 729 730 /* B-part */ 731 for (j = 0; j < countB; j++) { 732 if (reuse == MAT_INITIAL_MATRIX) { 733 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 734 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 735 } 736 val[jj++] = v2[j]; 737 } 738 } 739 irow += bs; 740 } 741 mumps->nnz = jj; 742 PetscFunctionReturn(PETSC_SUCCESS); 743 } 744 745 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 746 { 747 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 748 PetscInt64 rstart, nz, i, j, jj, irow, countA, countB; 749 PetscMUMPSInt *row, *col; 750 const PetscScalar *av, *bv, *v1, *v2; 751 PetscScalar *val; 752 Mat Ad, Ao; 753 Mat_SeqAIJ *aa; 754 Mat_SeqAIJ *bb; 755 756 PetscFunctionBegin; 757 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 758 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 759 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 760 761 aa = (Mat_SeqAIJ *)(Ad)->data; 762 bb = (Mat_SeqAIJ *)(Ao)->data; 763 ai = aa->i; 764 aj = aa->j; 765 bi = bb->i; 766 bj = bb->j; 767 768 rstart = A->rmap->rstart; 769 770 if (reuse == MAT_INITIAL_MATRIX) { 771 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 772 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 773 PetscCall(PetscMalloc1(nz, &val)); 774 mumps->nnz = nz; 775 mumps->irn = row; 776 mumps->jcn = col; 777 mumps->val = mumps->val_alloc = val; 778 } else { 779 val = mumps->val; 780 } 781 782 jj = 0; 783 irow = rstart; 784 for (i = 0; i < m; i++) { 785 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 786 countA = ai[i + 1] - ai[i]; 787 countB = bi[i + 1] - bi[i]; 788 bjj = bj + bi[i]; 789 v1 = av + ai[i]; 790 v2 = bv + bi[i]; 791 792 /* A-part */ 793 for (j = 0; j < countA; j++) { 794 if (reuse == MAT_INITIAL_MATRIX) { 795 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 796 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 797 } 798 val[jj++] = v1[j]; 799 } 800 801 /* B-part */ 802 for (j = 0; j < countB; j++) { 803 if (reuse == MAT_INITIAL_MATRIX) { 804 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 805 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 806 } 807 val[jj++] = v2[j]; 808 } 809 irow++; 810 } 811 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 812 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 813 PetscFunctionReturn(PETSC_SUCCESS); 814 } 815 816 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 817 { 818 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 819 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 820 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 821 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 822 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart; 823 const PetscInt bs2 = mat->bs2; 824 PetscInt bs; 825 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 826 PetscMUMPSInt *row, *col; 827 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 828 PetscScalar *val; 829 830 PetscFunctionBegin; 831 PetscCall(MatGetBlockSize(A, &bs)); 832 if (reuse == MAT_INITIAL_MATRIX) { 833 nz = bs2 * (aa->nz + bb->nz); 834 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 835 PetscCall(PetscMalloc1(nz, &val)); 836 mumps->nnz = nz; 837 mumps->irn = row; 838 mumps->jcn = col; 839 mumps->val = mumps->val_alloc = val; 840 } else { 841 val = mumps->val; 842 } 843 844 jj = 0; 845 irow = rstart; 846 for (i = 0; i < mbs; i++) { 847 countA = ai[i + 1] - ai[i]; 848 countB = bi[i + 1] - bi[i]; 849 ajj = aj + ai[i]; 850 bjj = bj + bi[i]; 851 v1 = av + bs2 * ai[i]; 852 v2 = bv + bs2 * bi[i]; 853 854 idx = 0; 855 /* A-part */ 856 for (k = 0; k < countA; k++) { 857 for (j = 0; j < bs; j++) { 858 for (n = 0; n < bs; n++) { 859 if (reuse == MAT_INITIAL_MATRIX) { 860 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 861 PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj])); 862 } 863 val[jj++] = v1[idx++]; 864 } 865 } 866 } 867 868 idx = 0; 869 /* B-part */ 870 for (k = 0; k < countB; k++) { 871 for (j = 0; j < bs; j++) { 872 for (n = 0; n < bs; n++) { 873 if (reuse == MAT_INITIAL_MATRIX) { 874 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 875 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 876 } 877 val[jj++] = v2[idx++]; 878 } 879 } 880 } 881 irow += bs; 882 } 883 PetscFunctionReturn(PETSC_SUCCESS); 884 } 885 886 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 887 { 888 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 889 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 890 PetscMUMPSInt *row, *col; 891 const PetscScalar *av, *bv, *v1, *v2; 892 PetscScalar *val; 893 Mat Ad, Ao; 894 Mat_SeqAIJ *aa; 895 Mat_SeqAIJ *bb; 896 #if defined(PETSC_USE_COMPLEX) 897 PetscBool hermitian, isset; 898 #endif 899 900 PetscFunctionBegin; 901 #if defined(PETSC_USE_COMPLEX) 902 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 903 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 904 #endif 905 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 906 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 907 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 908 909 aa = (Mat_SeqAIJ *)(Ad)->data; 910 bb = (Mat_SeqAIJ *)(Ao)->data; 911 ai = aa->i; 912 aj = aa->j; 913 adiag = aa->diag; 914 bi = bb->i; 915 bj = bb->j; 916 917 rstart = A->rmap->rstart; 918 919 if (reuse == MAT_INITIAL_MATRIX) { 920 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 921 nzb = 0; /* num of upper triangular entries in mat->B */ 922 for (i = 0; i < m; i++) { 923 nza += (ai[i + 1] - adiag[i]); 924 countB = bi[i + 1] - bi[i]; 925 bjj = bj + bi[i]; 926 for (j = 0; j < countB; j++) { 927 if (garray[bjj[j]] > rstart) nzb++; 928 } 929 } 930 931 nz = nza + nzb; /* total nz of upper triangular part of mat */ 932 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 933 PetscCall(PetscMalloc1(nz, &val)); 934 mumps->nnz = nz; 935 mumps->irn = row; 936 mumps->jcn = col; 937 mumps->val = mumps->val_alloc = val; 938 } else { 939 val = mumps->val; 940 } 941 942 jj = 0; 943 irow = rstart; 944 for (i = 0; i < m; i++) { 945 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 946 v1 = av + adiag[i]; 947 countA = ai[i + 1] - adiag[i]; 948 countB = bi[i + 1] - bi[i]; 949 bjj = bj + bi[i]; 950 v2 = bv + bi[i]; 951 952 /* A-part */ 953 for (j = 0; j < countA; j++) { 954 if (reuse == MAT_INITIAL_MATRIX) { 955 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 956 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 957 } 958 val[jj++] = v1[j]; 959 } 960 961 /* B-part */ 962 for (j = 0; j < countB; j++) { 963 if (garray[bjj[j]] > rstart) { 964 if (reuse == MAT_INITIAL_MATRIX) { 965 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 966 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 967 } 968 val[jj++] = v2[j]; 969 } 970 } 971 irow++; 972 } 973 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 974 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 975 PetscFunctionReturn(PETSC_SUCCESS); 976 } 977 978 PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 979 { 980 Mat **mats; 981 PetscInt nr, nc; 982 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 983 984 PetscFunctionBegin; 985 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 986 if (reuse == MAT_INITIAL_MATRIX) { 987 PetscMUMPSInt *irns, *jcns; 988 PetscScalar *vals; 989 PetscInt64 totnnz, cumnnz, maxnnz; 990 PetscInt *pjcns_w; 991 IS *rows, *cols; 992 PetscInt **rows_idx, **cols_idx; 993 994 cumnnz = 0; 995 maxnnz = 0; 996 PetscCall(PetscMalloc2(nr * nc, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 997 for (PetscInt r = 0; r < nr; r++) { 998 for (PetscInt c = 0; c < nc; c++) { 999 Mat sub = mats[r][c]; 1000 1001 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1002 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1003 if (sub) { 1004 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1005 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ; 1006 MatInfo info; 1007 1008 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1009 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1010 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1011 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1012 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1013 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1014 1015 if (chol) { 1016 if (r == c) { 1017 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1018 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1019 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1020 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1021 } else { 1022 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1023 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1024 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1025 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1026 } 1027 } else { 1028 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1029 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1030 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1031 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1032 } 1033 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1034 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1035 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1036 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1037 maxnnz = PetscMax(maxnnz, info.nz_used); 1038 } 1039 } 1040 } 1041 1042 /* Allocate total COO */ 1043 totnnz = cumnnz; 1044 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1045 PetscCall(PetscMalloc1(totnnz, &vals)); 1046 1047 /* Handle rows and column maps 1048 We directly map rows and use an SF for the columns */ 1049 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1050 PetscCall(MatNestGetISs(A, rows, cols)); 1051 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1052 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1053 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1054 else (void)(maxnnz); 1055 1056 cumnnz = 0; 1057 for (PetscInt r = 0; r < nr; r++) { 1058 for (PetscInt c = 0; c < nc; c++) { 1059 Mat sub = mats[r][c]; 1060 const PetscInt *ridx = rows_idx[r]; 1061 PetscInt rst; 1062 PetscSF csf; 1063 1064 mumps->nest_vals_start[r * nc + c] = cumnnz; 1065 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1066 1067 /* Directly use the mumps datastructure and use C ordering for now */ 1068 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1069 1070 /* Import values to full COO */ 1071 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1072 1073 /* Direct map of rows */ 1074 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1075 for (PetscInt k = 0; k < mumps->nnz; k++) PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1076 1077 /* Communicate column indices */ 1078 if (PetscDefined(USE_64BIT_INDICES)) { 1079 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1080 } else { 1081 pjcns_w = (PetscInt *)(jcns + cumnnz); /* This cast is needed only to silence warnings for 64bit integers builds */ 1082 PetscCall(PetscArraycpy(pjcns_w, mumps->jcn, mumps->nnz)); 1083 } 1084 1085 /* This could have been done with a single SF but it would have complicated the code a lot. */ 1086 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1087 PetscCall(PetscSFSetGraphLayout(csf, sub->cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1088 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cols_idx[c], pjcns_w, MPI_REPLACE)); 1089 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cols_idx[c], pjcns_w, MPI_REPLACE)); 1090 PetscCall(PetscSFDestroy(&csf)); 1091 1092 /* Apply Fortran index shifting for columns */ 1093 for (PetscInt k = 0; k < mumps->nnz; k++) PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1094 1095 /* Shift new starting point and sanity check */ 1096 cumnnz += mumps->nnz; 1097 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1098 1099 /* Free scratch memory */ 1100 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1101 PetscCall(PetscFree(mumps->val_alloc)); 1102 mumps->val = NULL; 1103 mumps->nnz = 0; 1104 } 1105 } 1106 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1107 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1108 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1109 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1110 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1111 1112 /* Set pointers for final MUMPS data structure */ 1113 mumps->nest_vals = vals; 1114 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1115 mumps->val = vals; 1116 mumps->irn = irns; 1117 mumps->jcn = jcns; 1118 mumps->nnz = cumnnz; 1119 } else { 1120 PetscScalar *oval = mumps->nest_vals; 1121 for (PetscInt r = 0; r < nr; r++) { 1122 for (PetscInt c = 0; c < nc; c++) { 1123 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1124 mumps->val = oval + mumps->nest_vals_start[r * nc + c]; 1125 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(mats[r][c], shift, MAT_REUSE_MATRIX, mumps)); 1126 } 1127 } 1128 mumps->val = oval; 1129 } 1130 PetscFunctionReturn(PETSC_SUCCESS); 1131 } 1132 1133 PetscErrorCode MatDestroy_MUMPS(Mat A) 1134 { 1135 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1136 1137 PetscFunctionBegin; 1138 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1139 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1140 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1141 PetscCall(VecDestroy(&mumps->b_seq)); 1142 PetscCall(VecDestroy(&mumps->x_seq)); 1143 PetscCall(PetscFree(mumps->id.perm_in)); 1144 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1145 PetscCall(PetscFree(mumps->val_alloc)); 1146 PetscCall(PetscFree(mumps->info)); 1147 PetscCall(PetscFree(mumps->ICNTL_pre)); 1148 PetscCall(PetscFree(mumps->CNTL_pre)); 1149 PetscCall(MatMumpsResetSchur_Private(mumps)); 1150 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1151 mumps->id.job = JOB_END; 1152 PetscMUMPS_c(mumps); 1153 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1154 if (mumps->mumps_comm != MPI_COMM_NULL) { 1155 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1156 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1157 } 1158 } 1159 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1160 if (mumps->use_petsc_omp_support) { 1161 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1162 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1163 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1164 } 1165 #endif 1166 PetscCall(PetscFree(mumps->ia_alloc)); 1167 PetscCall(PetscFree(mumps->ja_alloc)); 1168 PetscCall(PetscFree(mumps->recvcount)); 1169 PetscCall(PetscFree(mumps->reqs)); 1170 PetscCall(PetscFree(mumps->irhs_loc)); 1171 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1172 PetscCall(PetscFree(mumps->nest_vals)); 1173 PetscCall(PetscFree(A->data)); 1174 1175 /* clear composed functions */ 1176 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1177 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1178 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1179 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1180 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1181 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1182 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1183 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1184 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1185 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1186 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1187 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1188 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1189 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1190 PetscFunctionReturn(PETSC_SUCCESS); 1191 } 1192 1193 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1194 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1195 { 1196 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1197 const PetscMPIInt ompsize = mumps->omp_comm_size; 1198 PetscInt i, m, M, rstart; 1199 1200 PetscFunctionBegin; 1201 PetscCall(MatGetSize(A, &M, NULL)); 1202 PetscCall(MatGetLocalSize(A, &m, NULL)); 1203 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1204 if (ompsize == 1) { 1205 if (!mumps->irhs_loc) { 1206 mumps->nloc_rhs = m; 1207 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1208 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1209 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1210 } 1211 mumps->id.rhs_loc = (MumpsScalar *)array; 1212 } else { 1213 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1214 const PetscInt *ranges; 1215 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1216 MPI_Group petsc_group, omp_group; 1217 PetscScalar *recvbuf = NULL; 1218 1219 if (mumps->is_omp_master) { 1220 /* Lazily initialize the omp stuff for distributed rhs */ 1221 if (!mumps->irhs_loc) { 1222 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1223 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1224 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1225 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1226 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1227 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1228 1229 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1230 mumps->nloc_rhs = 0; 1231 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1232 for (j = 0; j < ompsize; j++) { 1233 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1234 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1235 } 1236 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1237 for (j = k = 0; j < ompsize; j++) { 1238 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1239 } 1240 1241 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1242 PetscCallMPI(MPI_Group_free(&petsc_group)); 1243 PetscCallMPI(MPI_Group_free(&omp_group)); 1244 } 1245 1246 /* Realloc buffers when current nrhs is bigger than what we have met */ 1247 if (nrhs > mumps->max_nrhs) { 1248 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1249 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1250 mumps->max_nrhs = nrhs; 1251 } 1252 1253 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1254 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1255 mumps->rhs_disps[0] = 0; 1256 for (j = 1; j < ompsize; j++) { 1257 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1258 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1259 } 1260 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1261 } 1262 1263 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1264 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1265 1266 if (mumps->is_omp_master) { 1267 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1268 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1269 for (j = 0; j < ompsize; j++) { 1270 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1271 dst = dstbase; 1272 for (i = 0; i < nrhs; i++) { 1273 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1274 src += mumps->rhs_nrow[j]; 1275 dst += mumps->nloc_rhs; 1276 } 1277 dstbase += mumps->rhs_nrow[j]; 1278 } 1279 } 1280 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1281 } 1282 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1283 } 1284 mumps->id.nrhs = nrhs; 1285 mumps->id.nloc_rhs = mumps->nloc_rhs; 1286 mumps->id.lrhs_loc = mumps->nloc_rhs; 1287 mumps->id.irhs_loc = mumps->irhs_loc; 1288 PetscFunctionReturn(PETSC_SUCCESS); 1289 } 1290 1291 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1292 { 1293 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1294 const PetscScalar *rarray = NULL; 1295 PetscScalar *array; 1296 IS is_iden, is_petsc; 1297 PetscInt i; 1298 PetscBool second_solve = PETSC_FALSE; 1299 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1300 1301 PetscFunctionBegin; 1302 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1303 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1304 &cite1)); 1305 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1306 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1307 &cite2)); 1308 1309 if (A->factorerrortype) { 1310 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1311 PetscCall(VecSetInf(x)); 1312 PetscFunctionReturn(PETSC_SUCCESS); 1313 } 1314 1315 mumps->id.nrhs = 1; 1316 if (mumps->petsc_size > 1) { 1317 if (mumps->ICNTL20 == 10) { 1318 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1319 PetscCall(VecGetArrayRead(b, &rarray)); 1320 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1321 } else { 1322 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1323 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1324 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1325 if (!mumps->myid) { 1326 PetscCall(VecGetArray(mumps->b_seq, &array)); 1327 mumps->id.rhs = (MumpsScalar *)array; 1328 } 1329 } 1330 } else { /* petsc_size == 1 */ 1331 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1332 PetscCall(VecCopy(b, x)); 1333 PetscCall(VecGetArray(x, &array)); 1334 mumps->id.rhs = (MumpsScalar *)array; 1335 } 1336 1337 /* 1338 handle condensation step of Schur complement (if any) 1339 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1340 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1341 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1342 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1343 */ 1344 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1345 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1346 second_solve = PETSC_TRUE; 1347 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1348 } 1349 /* solve phase */ 1350 mumps->id.job = JOB_SOLVE; 1351 PetscMUMPS_c(mumps); 1352 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1353 1354 /* handle expansion step of Schur complement (if any) */ 1355 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1356 1357 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1358 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1359 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1360 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1361 } 1362 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1363 PetscInt *isol2_loc = NULL; 1364 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1365 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1366 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1367 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1368 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1369 PetscCall(ISDestroy(&is_iden)); 1370 PetscCall(ISDestroy(&is_petsc)); 1371 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1372 } 1373 1374 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1375 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1376 } 1377 1378 if (mumps->petsc_size > 1) { 1379 if (mumps->ICNTL20 == 10) { 1380 PetscCall(VecRestoreArrayRead(b, &rarray)); 1381 } else if (!mumps->myid) { 1382 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1383 } 1384 } else PetscCall(VecRestoreArray(x, &array)); 1385 1386 PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3))); 1387 PetscFunctionReturn(PETSC_SUCCESS); 1388 } 1389 1390 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1391 { 1392 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1393 1394 PetscFunctionBegin; 1395 mumps->id.ICNTL(9) = 0; 1396 PetscCall(MatSolve_MUMPS(A, b, x)); 1397 mumps->id.ICNTL(9) = 1; 1398 PetscFunctionReturn(PETSC_SUCCESS); 1399 } 1400 1401 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1402 { 1403 Mat Bt = NULL; 1404 PetscBool denseX, denseB, flg, flgT; 1405 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1406 PetscInt i, nrhs, M; 1407 PetscScalar *array; 1408 const PetscScalar *rbray; 1409 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1410 PetscMUMPSInt *isol_loc, *isol_loc_save; 1411 PetscScalar *bray, *sol_loc, *sol_loc_save; 1412 IS is_to, is_from; 1413 PetscInt k, proc, j, m, myrstart; 1414 const PetscInt *rstart; 1415 Vec v_mpi, msol_loc; 1416 VecScatter scat_sol; 1417 Vec b_seq; 1418 VecScatter scat_rhs; 1419 PetscScalar *aa; 1420 PetscInt spnr, *ia, *ja; 1421 Mat_MPIAIJ *b = NULL; 1422 1423 PetscFunctionBegin; 1424 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1425 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1426 1427 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1428 if (denseB) { 1429 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1430 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1431 } else { /* sparse B */ 1432 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1433 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1434 if (flgT) { /* input B is transpose of actual RHS matrix, 1435 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1436 PetscCall(MatTransposeGetMat(B, &Bt)); 1437 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1438 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1439 } 1440 1441 PetscCall(MatGetSize(B, &M, &nrhs)); 1442 mumps->id.nrhs = nrhs; 1443 mumps->id.lrhs = M; 1444 mumps->id.rhs = NULL; 1445 1446 if (mumps->petsc_size == 1) { 1447 PetscScalar *aa; 1448 PetscInt spnr, *ia, *ja; 1449 PetscBool second_solve = PETSC_FALSE; 1450 1451 PetscCall(MatDenseGetArray(X, &array)); 1452 mumps->id.rhs = (MumpsScalar *)array; 1453 1454 if (denseB) { 1455 /* copy B to X */ 1456 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1457 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1458 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1459 } else { /* sparse B */ 1460 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1461 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1462 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1463 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1464 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1465 } 1466 /* handle condensation step of Schur complement (if any) */ 1467 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1468 second_solve = PETSC_TRUE; 1469 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1470 } 1471 /* solve phase */ 1472 mumps->id.job = JOB_SOLVE; 1473 PetscMUMPS_c(mumps); 1474 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1475 1476 /* handle expansion step of Schur complement (if any) */ 1477 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1478 if (!denseB) { /* sparse B */ 1479 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1480 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1481 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1482 } 1483 PetscCall(MatDenseRestoreArray(X, &array)); 1484 PetscFunctionReturn(PETSC_SUCCESS); 1485 } 1486 1487 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1488 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1489 1490 /* create msol_loc to hold mumps local solution */ 1491 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1492 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1493 1494 lsol_loc = mumps->id.lsol_loc; 1495 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1496 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1497 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1498 mumps->id.isol_loc = isol_loc; 1499 1500 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1501 1502 if (denseB) { 1503 if (mumps->ICNTL20 == 10) { 1504 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1505 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1506 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1507 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1508 PetscCall(MatGetLocalSize(B, &m, NULL)); 1509 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1510 } else { 1511 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1512 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1513 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1514 0, re-arrange B into desired order, which is a local operation. 1515 */ 1516 1517 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1518 /* wrap dense rhs matrix B into a vector v_mpi */ 1519 PetscCall(MatGetLocalSize(B, &m, NULL)); 1520 PetscCall(MatDenseGetArray(B, &bray)); 1521 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1522 PetscCall(MatDenseRestoreArray(B, &bray)); 1523 1524 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1525 if (!mumps->myid) { 1526 PetscInt *idx; 1527 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1528 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1529 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1530 k = 0; 1531 for (proc = 0; proc < mumps->petsc_size; proc++) { 1532 for (j = 0; j < nrhs; j++) { 1533 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1534 } 1535 } 1536 1537 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1538 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1539 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1540 } else { 1541 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1542 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1543 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1544 } 1545 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1546 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1547 PetscCall(ISDestroy(&is_to)); 1548 PetscCall(ISDestroy(&is_from)); 1549 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1550 1551 if (!mumps->myid) { /* define rhs on the host */ 1552 PetscCall(VecGetArray(b_seq, &bray)); 1553 mumps->id.rhs = (MumpsScalar *)bray; 1554 PetscCall(VecRestoreArray(b_seq, &bray)); 1555 } 1556 } 1557 } else { /* sparse B */ 1558 b = (Mat_MPIAIJ *)Bt->data; 1559 1560 /* wrap dense X into a vector v_mpi */ 1561 PetscCall(MatGetLocalSize(X, &m, NULL)); 1562 PetscCall(MatDenseGetArray(X, &bray)); 1563 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1564 PetscCall(MatDenseRestoreArray(X, &bray)); 1565 1566 if (!mumps->myid) { 1567 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1568 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1569 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1570 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1571 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1572 } else { 1573 mumps->id.irhs_ptr = NULL; 1574 mumps->id.irhs_sparse = NULL; 1575 mumps->id.nz_rhs = 0; 1576 mumps->id.rhs_sparse = NULL; 1577 } 1578 } 1579 1580 /* solve phase */ 1581 mumps->id.job = JOB_SOLVE; 1582 PetscMUMPS_c(mumps); 1583 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1584 1585 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1586 PetscCall(MatDenseGetArray(X, &array)); 1587 PetscCall(VecPlaceArray(v_mpi, array)); 1588 1589 /* create scatter scat_sol */ 1590 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1591 /* iidx: index for scatter mumps solution to petsc X */ 1592 1593 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1594 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1595 for (i = 0; i < lsol_loc; i++) { 1596 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1597 1598 for (proc = 0; proc < mumps->petsc_size; proc++) { 1599 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1600 myrstart = rstart[proc]; 1601 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1602 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1603 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1604 break; 1605 } 1606 } 1607 1608 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1609 } 1610 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1611 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1612 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1613 PetscCall(ISDestroy(&is_from)); 1614 PetscCall(ISDestroy(&is_to)); 1615 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1616 PetscCall(MatDenseRestoreArray(X, &array)); 1617 1618 /* free spaces */ 1619 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1620 mumps->id.isol_loc = isol_loc_save; 1621 1622 PetscCall(PetscFree2(sol_loc, isol_loc)); 1623 PetscCall(PetscFree(idxx)); 1624 PetscCall(VecDestroy(&msol_loc)); 1625 PetscCall(VecDestroy(&v_mpi)); 1626 if (!denseB) { 1627 if (!mumps->myid) { 1628 b = (Mat_MPIAIJ *)Bt->data; 1629 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1630 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1631 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1632 } 1633 } else { 1634 if (mumps->ICNTL20 == 0) { 1635 PetscCall(VecDestroy(&b_seq)); 1636 PetscCall(VecScatterDestroy(&scat_rhs)); 1637 } 1638 } 1639 PetscCall(VecScatterDestroy(&scat_sol)); 1640 PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3))); 1641 PetscFunctionReturn(PETSC_SUCCESS); 1642 } 1643 1644 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1645 { 1646 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1647 PetscMUMPSInt oldvalue = mumps->id.ICNTL(9); 1648 1649 PetscFunctionBegin; 1650 mumps->id.ICNTL(9) = 0; 1651 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1652 mumps->id.ICNTL(9) = oldvalue; 1653 PetscFunctionReturn(PETSC_SUCCESS); 1654 } 1655 1656 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1657 { 1658 PetscBool flg; 1659 Mat B; 1660 1661 PetscFunctionBegin; 1662 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1663 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1664 1665 /* Create B=Bt^T that uses Bt's data structure */ 1666 PetscCall(MatCreateTranspose(Bt, &B)); 1667 1668 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1669 PetscCall(MatDestroy(&B)); 1670 PetscFunctionReturn(PETSC_SUCCESS); 1671 } 1672 1673 #if !defined(PETSC_USE_COMPLEX) 1674 /* 1675 input: 1676 F: numeric factor 1677 output: 1678 nneg: total number of negative pivots 1679 nzero: total number of zero pivots 1680 npos: (global dimension of F) - nneg - nzero 1681 */ 1682 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1683 { 1684 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1685 PetscMPIInt size; 1686 1687 PetscFunctionBegin; 1688 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1689 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1690 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1691 1692 if (nneg) *nneg = mumps->id.INFOG(12); 1693 if (nzero || npos) { 1694 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1695 if (nzero) *nzero = mumps->id.INFOG(28); 1696 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1697 } 1698 PetscFunctionReturn(PETSC_SUCCESS); 1699 } 1700 #endif 1701 1702 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1703 { 1704 PetscInt i, nreqs; 1705 PetscMUMPSInt *irn, *jcn; 1706 PetscMPIInt count; 1707 PetscInt64 totnnz, remain; 1708 const PetscInt osize = mumps->omp_comm_size; 1709 PetscScalar *val; 1710 1711 PetscFunctionBegin; 1712 if (osize > 1) { 1713 if (reuse == MAT_INITIAL_MATRIX) { 1714 /* master first gathers counts of nonzeros to receive */ 1715 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1716 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1717 1718 /* Then each computes number of send/recvs */ 1719 if (mumps->is_omp_master) { 1720 /* Start from 1 since self communication is not done in MPI */ 1721 nreqs = 0; 1722 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1723 } else { 1724 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1725 } 1726 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1727 1728 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1729 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1730 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1731 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1732 */ 1733 nreqs = 0; /* counter for actual send/recvs */ 1734 if (mumps->is_omp_master) { 1735 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1736 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1737 PetscCall(PetscMalloc1(totnnz, &val)); 1738 1739 /* Self communication */ 1740 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1741 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1742 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1743 1744 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1745 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1746 PetscCall(PetscFree(mumps->val_alloc)); 1747 mumps->nnz = totnnz; 1748 mumps->irn = irn; 1749 mumps->jcn = jcn; 1750 mumps->val = mumps->val_alloc = val; 1751 1752 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1753 jcn += mumps->recvcount[0]; 1754 val += mumps->recvcount[0]; 1755 1756 /* Remote communication */ 1757 for (i = 1; i < osize; i++) { 1758 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1759 remain = mumps->recvcount[i] - count; 1760 while (count > 0) { 1761 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1762 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1763 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1764 irn += count; 1765 jcn += count; 1766 val += count; 1767 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1768 remain -= count; 1769 } 1770 } 1771 } else { 1772 irn = mumps->irn; 1773 jcn = mumps->jcn; 1774 val = mumps->val; 1775 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1776 remain = mumps->nnz - count; 1777 while (count > 0) { 1778 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1779 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1780 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1781 irn += count; 1782 jcn += count; 1783 val += count; 1784 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1785 remain -= count; 1786 } 1787 } 1788 } else { 1789 nreqs = 0; 1790 if (mumps->is_omp_master) { 1791 val = mumps->val + mumps->recvcount[0]; 1792 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1793 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1794 remain = mumps->recvcount[i] - count; 1795 while (count > 0) { 1796 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1797 val += count; 1798 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1799 remain -= count; 1800 } 1801 } 1802 } else { 1803 val = mumps->val; 1804 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1805 remain = mumps->nnz - count; 1806 while (count > 0) { 1807 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1808 val += count; 1809 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1810 remain -= count; 1811 } 1812 } 1813 } 1814 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1815 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1816 } 1817 PetscFunctionReturn(PETSC_SUCCESS); 1818 } 1819 1820 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1821 { 1822 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1823 PetscBool isMPIAIJ; 1824 1825 PetscFunctionBegin; 1826 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1827 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1828 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1829 PetscFunctionReturn(PETSC_SUCCESS); 1830 } 1831 1832 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1833 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1834 1835 /* numerical factorization phase */ 1836 mumps->id.job = JOB_FACTNUMERIC; 1837 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1838 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1839 } else { 1840 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1841 } 1842 PetscMUMPS_c(mumps); 1843 if (mumps->id.INFOG(1) < 0) { 1844 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 1845 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1846 PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1847 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1848 } else if (mumps->id.INFOG(1) == -13) { 1849 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1850 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1851 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1852 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1853 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1854 } else { 1855 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1856 F->factorerrortype = MAT_FACTOR_OTHER; 1857 } 1858 } 1859 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, " mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16)); 1860 1861 F->assembled = PETSC_TRUE; 1862 1863 if (F->schur) { /* reset Schur status to unfactored */ 1864 #if defined(PETSC_HAVE_CUDA) 1865 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1866 #endif 1867 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1868 mumps->id.ICNTL(19) = 2; 1869 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1870 } 1871 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1872 } 1873 1874 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1875 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1876 1877 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1878 if (mumps->petsc_size > 1) { 1879 PetscInt lsol_loc; 1880 PetscScalar *sol_loc; 1881 1882 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1883 1884 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1885 if (mumps->x_seq) { 1886 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1887 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1888 PetscCall(VecDestroy(&mumps->x_seq)); 1889 } 1890 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1891 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1892 mumps->id.lsol_loc = lsol_loc; 1893 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1894 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 1895 } 1896 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1897 PetscFunctionReturn(PETSC_SUCCESS); 1898 } 1899 1900 /* Sets MUMPS options from the options database */ 1901 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1902 { 1903 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1904 PetscMUMPSInt icntl = 0, size, *listvar_schur; 1905 PetscInt info[80], i, ninfo = 80, rbs, cbs; 1906 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1907 MumpsScalar *arr; 1908 1909 PetscFunctionBegin; 1910 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 1911 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1912 PetscInt nthreads = 0; 1913 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1914 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1915 1916 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1917 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 1918 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 1919 1920 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 1921 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1922 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1923 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 1924 if (mumps->use_petsc_omp_support) { 1925 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 1926 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1927 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1928 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1929 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 1930 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 1931 #endif 1932 } else { 1933 mumps->omp_comm = PETSC_COMM_SELF; 1934 mumps->mumps_comm = mumps->petsc_comm; 1935 mumps->is_omp_master = PETSC_TRUE; 1936 } 1937 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 1938 mumps->reqs = NULL; 1939 mumps->tag = 0; 1940 1941 if (mumps->mumps_comm != MPI_COMM_NULL) { 1942 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 1943 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1944 MPI_Comm comm; 1945 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 1946 mumps->mumps_comm = comm; 1947 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 1948 } 1949 1950 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1951 mumps->id.job = JOB_INIT; 1952 mumps->id.par = 1; /* host participates factorizaton and solve */ 1953 mumps->id.sym = mumps->sym; 1954 1955 size = mumps->id.size_schur; 1956 arr = mumps->id.schur; 1957 listvar_schur = mumps->id.listvar_schur; 1958 PetscMUMPS_c(mumps); 1959 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1960 /* restore cached ICNTL and CNTL values */ 1961 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 1962 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 1963 PetscCall(PetscFree(mumps->ICNTL_pre)); 1964 PetscCall(PetscFree(mumps->CNTL_pre)); 1965 1966 if (schur) { 1967 mumps->id.size_schur = size; 1968 mumps->id.schur_lld = size; 1969 mumps->id.schur = arr; 1970 mumps->id.listvar_schur = listvar_schur; 1971 if (mumps->petsc_size > 1) { 1972 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 1973 1974 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1975 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1976 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 1977 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 1978 } else { 1979 if (F->factortype == MAT_FACTOR_LU) { 1980 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1981 } else { 1982 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1983 } 1984 } 1985 mumps->id.ICNTL(26) = -1; 1986 } 1987 1988 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1989 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1990 */ 1991 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 1992 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 1993 1994 mumps->scat_rhs = NULL; 1995 mumps->scat_sol = NULL; 1996 1997 /* set PETSc-MUMPS default options - override MUMPS default */ 1998 mumps->id.ICNTL(3) = 0; 1999 mumps->id.ICNTL(4) = 0; 2000 if (mumps->petsc_size == 1) { 2001 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2002 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2003 } else { 2004 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2005 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2006 } 2007 } 2008 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2009 if (flg) mumps->id.ICNTL(1) = icntl; 2010 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2011 if (flg) mumps->id.ICNTL(2) = icntl; 2012 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2013 if (flg) mumps->id.ICNTL(3) = icntl; 2014 2015 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2016 if (flg) mumps->id.ICNTL(4) = icntl; 2017 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2018 2019 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2020 if (flg) mumps->id.ICNTL(6) = icntl; 2021 2022 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2023 if (flg) { 2024 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2025 mumps->id.ICNTL(7) = icntl; 2026 } 2027 2028 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2029 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2030 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2031 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2032 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2033 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2034 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2035 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2036 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2037 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2038 if (flg) { 2039 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2040 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2041 } 2042 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2043 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2044 PetscCall(MatDestroy(&F->schur)); 2045 PetscCall(MatMumpsResetSchur_Private(mumps)); 2046 } 2047 2048 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2049 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2050 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2051 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2052 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2053 In short, we could not use distributed RHS with MPICH until v4.0b1. 2054 */ 2055 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 2056 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2057 #else 2058 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2059 #endif 2060 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2061 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2062 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2063 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2064 #endif 2065 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2066 2067 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2068 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2069 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2070 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2071 2072 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2073 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2074 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2075 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2076 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2077 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2078 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2079 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2080 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2081 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2082 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2083 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2084 2085 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2086 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2087 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2088 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2089 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2090 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2091 2092 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2093 2094 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2095 if (ninfo) { 2096 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2097 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2098 mumps->ninfo = ninfo; 2099 for (i = 0; i < ninfo; i++) { 2100 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2101 mumps->info[i] = info[i]; 2102 } 2103 } 2104 PetscOptionsEnd(); 2105 PetscFunctionReturn(PETSC_SUCCESS); 2106 } 2107 2108 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2109 { 2110 PetscFunctionBegin; 2111 if (mumps->id.INFOG(1) < 0) { 2112 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1)); 2113 if (mumps->id.INFOG(1) == -6) { 2114 PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2115 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2116 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2117 PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2118 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2119 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2120 PetscCall(PetscInfo(F, "Empty matrix\n")); 2121 } else { 2122 PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2123 F->factorerrortype = MAT_FACTOR_OTHER; 2124 } 2125 } 2126 PetscFunctionReturn(PETSC_SUCCESS); 2127 } 2128 2129 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2130 { 2131 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2132 Vec b; 2133 const PetscInt M = A->rmap->N; 2134 2135 PetscFunctionBegin; 2136 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2137 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2138 PetscFunctionReturn(PETSC_SUCCESS); 2139 } 2140 2141 /* Set MUMPS options from the options database */ 2142 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2143 2144 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2145 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2146 2147 /* analysis phase */ 2148 mumps->id.job = JOB_FACTSYMBOLIC; 2149 mumps->id.n = M; 2150 switch (mumps->id.ICNTL(18)) { 2151 case 0: /* centralized assembled matrix input */ 2152 if (!mumps->myid) { 2153 mumps->id.nnz = mumps->nnz; 2154 mumps->id.irn = mumps->irn; 2155 mumps->id.jcn = mumps->jcn; 2156 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2157 if (r) { 2158 mumps->id.ICNTL(7) = 1; 2159 if (!mumps->myid) { 2160 const PetscInt *idx; 2161 PetscInt i; 2162 2163 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2164 PetscCall(ISGetIndices(r, &idx)); 2165 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2166 PetscCall(ISRestoreIndices(r, &idx)); 2167 } 2168 } 2169 } 2170 break; 2171 case 3: /* distributed assembled matrix input (size>1) */ 2172 mumps->id.nnz_loc = mumps->nnz; 2173 mumps->id.irn_loc = mumps->irn; 2174 mumps->id.jcn_loc = mumps->jcn; 2175 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2176 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2177 PetscCall(MatCreateVecs(A, NULL, &b)); 2178 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2179 PetscCall(VecDestroy(&b)); 2180 } 2181 break; 2182 } 2183 PetscMUMPS_c(mumps); 2184 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2185 2186 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2187 F->ops->solve = MatSolve_MUMPS; 2188 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2189 F->ops->matsolve = MatMatSolve_MUMPS; 2190 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2191 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2192 2193 mumps->matstruc = SAME_NONZERO_PATTERN; 2194 PetscFunctionReturn(PETSC_SUCCESS); 2195 } 2196 2197 /* Note the Petsc r and c permutations are ignored */ 2198 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2199 { 2200 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2201 Vec b; 2202 const PetscInt M = A->rmap->N; 2203 2204 PetscFunctionBegin; 2205 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2206 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2207 PetscFunctionReturn(PETSC_SUCCESS); 2208 } 2209 2210 /* Set MUMPS options from the options database */ 2211 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2212 2213 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2214 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2215 2216 /* analysis phase */ 2217 mumps->id.job = JOB_FACTSYMBOLIC; 2218 mumps->id.n = M; 2219 switch (mumps->id.ICNTL(18)) { 2220 case 0: /* centralized assembled matrix input */ 2221 if (!mumps->myid) { 2222 mumps->id.nnz = mumps->nnz; 2223 mumps->id.irn = mumps->irn; 2224 mumps->id.jcn = mumps->jcn; 2225 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2226 } 2227 break; 2228 case 3: /* distributed assembled matrix input (size>1) */ 2229 mumps->id.nnz_loc = mumps->nnz; 2230 mumps->id.irn_loc = mumps->irn; 2231 mumps->id.jcn_loc = mumps->jcn; 2232 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2233 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2234 PetscCall(MatCreateVecs(A, NULL, &b)); 2235 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2236 PetscCall(VecDestroy(&b)); 2237 } 2238 break; 2239 } 2240 PetscMUMPS_c(mumps); 2241 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2242 2243 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2244 F->ops->solve = MatSolve_MUMPS; 2245 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2246 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2247 2248 mumps->matstruc = SAME_NONZERO_PATTERN; 2249 PetscFunctionReturn(PETSC_SUCCESS); 2250 } 2251 2252 /* Note the Petsc r permutation and factor info are ignored */ 2253 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2254 { 2255 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2256 Vec b; 2257 const PetscInt M = A->rmap->N; 2258 2259 PetscFunctionBegin; 2260 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2261 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2262 PetscFunctionReturn(PETSC_SUCCESS); 2263 } 2264 2265 /* Set MUMPS options from the options database */ 2266 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2267 2268 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2269 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2270 2271 /* analysis phase */ 2272 mumps->id.job = JOB_FACTSYMBOLIC; 2273 mumps->id.n = M; 2274 switch (mumps->id.ICNTL(18)) { 2275 case 0: /* centralized assembled matrix input */ 2276 if (!mumps->myid) { 2277 mumps->id.nnz = mumps->nnz; 2278 mumps->id.irn = mumps->irn; 2279 mumps->id.jcn = mumps->jcn; 2280 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2281 } 2282 break; 2283 case 3: /* distributed assembled matrix input (size>1) */ 2284 mumps->id.nnz_loc = mumps->nnz; 2285 mumps->id.irn_loc = mumps->irn; 2286 mumps->id.jcn_loc = mumps->jcn; 2287 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2288 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2289 PetscCall(MatCreateVecs(A, NULL, &b)); 2290 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2291 PetscCall(VecDestroy(&b)); 2292 } 2293 break; 2294 } 2295 PetscMUMPS_c(mumps); 2296 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2297 2298 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2299 F->ops->solve = MatSolve_MUMPS; 2300 F->ops->solvetranspose = MatSolve_MUMPS; 2301 F->ops->matsolve = MatMatSolve_MUMPS; 2302 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2303 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2304 #if defined(PETSC_USE_COMPLEX) 2305 F->ops->getinertia = NULL; 2306 #else 2307 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2308 #endif 2309 2310 mumps->matstruc = SAME_NONZERO_PATTERN; 2311 PetscFunctionReturn(PETSC_SUCCESS); 2312 } 2313 2314 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2315 { 2316 PetscBool iascii; 2317 PetscViewerFormat format; 2318 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2319 2320 PetscFunctionBegin; 2321 /* check if matrix is mumps type */ 2322 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2323 2324 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2325 if (iascii) { 2326 PetscCall(PetscViewerGetFormat(viewer, &format)); 2327 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2328 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2329 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2330 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2331 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2332 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2333 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2334 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2335 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2336 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2337 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2338 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2339 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2340 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2341 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2342 if (mumps->id.ICNTL(11) > 0) { 2343 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2344 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2345 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2346 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2347 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2348 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2349 } 2350 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2351 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2352 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2353 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2354 /* ICNTL(15-17) not used */ 2355 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2356 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2357 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2358 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2359 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2360 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2361 2362 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2363 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2364 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2365 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2366 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2367 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2368 2369 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2370 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2371 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2372 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2373 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2374 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2375 2376 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2377 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2378 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2379 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2380 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2381 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2382 2383 /* information local to each processor */ 2384 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2385 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2386 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2387 PetscCall(PetscViewerFlush(viewer)); 2388 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2389 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2390 PetscCall(PetscViewerFlush(viewer)); 2391 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2392 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2393 PetscCall(PetscViewerFlush(viewer)); 2394 2395 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2396 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2397 PetscCall(PetscViewerFlush(viewer)); 2398 2399 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2400 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2401 PetscCall(PetscViewerFlush(viewer)); 2402 2403 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2404 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2405 PetscCall(PetscViewerFlush(viewer)); 2406 2407 if (mumps->ninfo && mumps->ninfo <= 80) { 2408 PetscInt i; 2409 for (i = 0; i < mumps->ninfo; i++) { 2410 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2411 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2412 PetscCall(PetscViewerFlush(viewer)); 2413 } 2414 } 2415 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2416 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2417 2418 if (mumps->myid == 0) { /* information from the host */ 2419 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2420 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2421 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2422 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2423 2424 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2425 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2426 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2427 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2428 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2429 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2430 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2431 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2432 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2433 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2434 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2435 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2436 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2437 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2438 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2439 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2440 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2441 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2442 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2443 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2444 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2445 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2446 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2447 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2448 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2449 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2450 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2451 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2452 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2453 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2454 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2455 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2456 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2457 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2458 } 2459 } 2460 } 2461 PetscFunctionReturn(PETSC_SUCCESS); 2462 } 2463 2464 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2465 { 2466 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2467 2468 PetscFunctionBegin; 2469 info->block_size = 1.0; 2470 info->nz_allocated = mumps->id.INFOG(20); 2471 info->nz_used = mumps->id.INFOG(20); 2472 info->nz_unneeded = 0.0; 2473 info->assemblies = 0.0; 2474 info->mallocs = 0.0; 2475 info->memory = 0.0; 2476 info->fill_ratio_given = 0; 2477 info->fill_ratio_needed = 0; 2478 info->factor_mallocs = 0; 2479 PetscFunctionReturn(PETSC_SUCCESS); 2480 } 2481 2482 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2483 { 2484 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2485 const PetscScalar *arr; 2486 const PetscInt *idxs; 2487 PetscInt size, i; 2488 2489 PetscFunctionBegin; 2490 PetscCall(ISGetLocalSize(is, &size)); 2491 /* Schur complement matrix */ 2492 PetscCall(MatDestroy(&F->schur)); 2493 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2494 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2495 mumps->id.schur = (MumpsScalar *)arr; 2496 mumps->id.size_schur = size; 2497 mumps->id.schur_lld = size; 2498 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2499 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2500 2501 /* MUMPS expects Fortran style indices */ 2502 PetscCall(PetscFree(mumps->id.listvar_schur)); 2503 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2504 PetscCall(ISGetIndices(is, &idxs)); 2505 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2506 PetscCall(ISRestoreIndices(is, &idxs)); 2507 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2508 mumps->id.ICNTL(26) = -1; 2509 PetscFunctionReturn(PETSC_SUCCESS); 2510 } 2511 2512 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2513 { 2514 Mat St; 2515 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2516 PetscScalar *array; 2517 #if defined(PETSC_USE_COMPLEX) 2518 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2519 #endif 2520 2521 PetscFunctionBegin; 2522 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2523 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2524 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2525 PetscCall(MatSetType(St, MATDENSE)); 2526 PetscCall(MatSetUp(St)); 2527 PetscCall(MatDenseGetArray(St, &array)); 2528 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2529 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2530 PetscInt i, j, N = mumps->id.size_schur; 2531 for (i = 0; i < N; i++) { 2532 for (j = 0; j < N; j++) { 2533 #if !defined(PETSC_USE_COMPLEX) 2534 PetscScalar val = mumps->id.schur[i * N + j]; 2535 #else 2536 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2537 #endif 2538 array[j * N + i] = val; 2539 } 2540 } 2541 } else { /* stored by columns */ 2542 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2543 } 2544 } else { /* either full or lower-triangular (not packed) */ 2545 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2546 PetscInt i, j, N = mumps->id.size_schur; 2547 for (i = 0; i < N; i++) { 2548 for (j = i; j < N; j++) { 2549 #if !defined(PETSC_USE_COMPLEX) 2550 PetscScalar val = mumps->id.schur[i * N + j]; 2551 #else 2552 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2553 #endif 2554 array[i * N + j] = val; 2555 array[j * N + i] = val; 2556 } 2557 } 2558 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2559 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2560 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2561 PetscInt i, j, N = mumps->id.size_schur; 2562 for (i = 0; i < N; i++) { 2563 for (j = 0; j < i + 1; j++) { 2564 #if !defined(PETSC_USE_COMPLEX) 2565 PetscScalar val = mumps->id.schur[i * N + j]; 2566 #else 2567 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2568 #endif 2569 array[i * N + j] = val; 2570 array[j * N + i] = val; 2571 } 2572 } 2573 } 2574 } 2575 PetscCall(MatDenseRestoreArray(St, &array)); 2576 *S = St; 2577 PetscFunctionReturn(PETSC_SUCCESS); 2578 } 2579 2580 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2581 { 2582 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2583 2584 PetscFunctionBegin; 2585 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2586 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2587 for (i = 0; i < nICNTL_pre; ++i) 2588 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2589 if (i == nICNTL_pre) { /* not already cached */ 2590 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2591 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2592 mumps->ICNTL_pre[0]++; 2593 } 2594 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2595 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2596 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2597 PetscFunctionReturn(PETSC_SUCCESS); 2598 } 2599 2600 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2601 { 2602 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2603 2604 PetscFunctionBegin; 2605 if (mumps->id.job == JOB_NULL) { 2606 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2607 *ival = 0; 2608 for (i = 0; i < nICNTL_pre; ++i) { 2609 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2610 } 2611 } else *ival = mumps->id.ICNTL(icntl); 2612 PetscFunctionReturn(PETSC_SUCCESS); 2613 } 2614 2615 /*@ 2616 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2617 2618 Logically Collective 2619 2620 Input Parameters: 2621 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2622 . icntl - index of MUMPS parameter array ICNTL() 2623 - ival - value of MUMPS ICNTL(icntl) 2624 2625 Options Database Key: 2626 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2627 2628 Level: beginner 2629 2630 References: 2631 . * - MUMPS Users' Guide 2632 2633 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2634 @*/ 2635 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2636 { 2637 PetscFunctionBegin; 2638 PetscValidType(F, 1); 2639 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2640 PetscValidLogicalCollectiveInt(F, icntl, 2); 2641 PetscValidLogicalCollectiveInt(F, ival, 3); 2642 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2643 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2644 PetscFunctionReturn(PETSC_SUCCESS); 2645 } 2646 2647 /*@ 2648 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2649 2650 Logically Collective 2651 2652 Input Parameters: 2653 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2654 - icntl - index of MUMPS parameter array ICNTL() 2655 2656 Output Parameter: 2657 . ival - value of MUMPS ICNTL(icntl) 2658 2659 Level: beginner 2660 2661 References: 2662 . * - MUMPS Users' Guide 2663 2664 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2665 @*/ 2666 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2667 { 2668 PetscFunctionBegin; 2669 PetscValidType(F, 1); 2670 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2671 PetscValidLogicalCollectiveInt(F, icntl, 2); 2672 PetscValidIntPointer(ival, 3); 2673 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2674 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2675 PetscFunctionReturn(PETSC_SUCCESS); 2676 } 2677 2678 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2679 { 2680 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2681 2682 PetscFunctionBegin; 2683 if (mumps->id.job == JOB_NULL) { 2684 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2685 for (i = 0; i < nCNTL_pre; ++i) 2686 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2687 if (i == nCNTL_pre) { 2688 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2689 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2690 mumps->CNTL_pre[0]++; 2691 } 2692 mumps->CNTL_pre[1 + 2 * i] = icntl; 2693 mumps->CNTL_pre[2 + 2 * i] = val; 2694 } else mumps->id.CNTL(icntl) = val; 2695 PetscFunctionReturn(PETSC_SUCCESS); 2696 } 2697 2698 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2699 { 2700 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2701 2702 PetscFunctionBegin; 2703 if (mumps->id.job == JOB_NULL) { 2704 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2705 *val = 0.0; 2706 for (i = 0; i < nCNTL_pre; ++i) { 2707 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2708 } 2709 } else *val = mumps->id.CNTL(icntl); 2710 PetscFunctionReturn(PETSC_SUCCESS); 2711 } 2712 2713 /*@ 2714 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2715 2716 Logically Collective 2717 2718 Input Parameters: 2719 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2720 . icntl - index of MUMPS parameter array CNTL() 2721 - val - value of MUMPS CNTL(icntl) 2722 2723 Options Database Key: 2724 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2725 2726 Level: beginner 2727 2728 References: 2729 . * - MUMPS Users' Guide 2730 2731 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2732 @*/ 2733 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2734 { 2735 PetscFunctionBegin; 2736 PetscValidType(F, 1); 2737 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2738 PetscValidLogicalCollectiveInt(F, icntl, 2); 2739 PetscValidLogicalCollectiveReal(F, val, 3); 2740 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2741 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2742 PetscFunctionReturn(PETSC_SUCCESS); 2743 } 2744 2745 /*@ 2746 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2747 2748 Logically Collective 2749 2750 Input Parameters: 2751 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2752 - icntl - index of MUMPS parameter array CNTL() 2753 2754 Output Parameter: 2755 . val - value of MUMPS CNTL(icntl) 2756 2757 Level: beginner 2758 2759 References: 2760 . * - MUMPS Users' Guide 2761 2762 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2763 @*/ 2764 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2765 { 2766 PetscFunctionBegin; 2767 PetscValidType(F, 1); 2768 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2769 PetscValidLogicalCollectiveInt(F, icntl, 2); 2770 PetscValidRealPointer(val, 3); 2771 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2772 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2773 PetscFunctionReturn(PETSC_SUCCESS); 2774 } 2775 2776 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2777 { 2778 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2779 2780 PetscFunctionBegin; 2781 *info = mumps->id.INFO(icntl); 2782 PetscFunctionReturn(PETSC_SUCCESS); 2783 } 2784 2785 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2786 { 2787 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2788 2789 PetscFunctionBegin; 2790 *infog = mumps->id.INFOG(icntl); 2791 PetscFunctionReturn(PETSC_SUCCESS); 2792 } 2793 2794 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2795 { 2796 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2797 2798 PetscFunctionBegin; 2799 *rinfo = mumps->id.RINFO(icntl); 2800 PetscFunctionReturn(PETSC_SUCCESS); 2801 } 2802 2803 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2804 { 2805 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2806 2807 PetscFunctionBegin; 2808 *rinfog = mumps->id.RINFOG(icntl); 2809 PetscFunctionReturn(PETSC_SUCCESS); 2810 } 2811 2812 PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2813 { 2814 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2815 2816 PetscFunctionBegin; 2817 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2818 *size = 0; 2819 *array = NULL; 2820 if (!mumps->myid) { 2821 *size = mumps->id.INFOG(28); 2822 PetscCall(PetscMalloc1(*size, array)); 2823 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2824 } 2825 PetscFunctionReturn(PETSC_SUCCESS); 2826 } 2827 2828 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2829 { 2830 Mat Bt = NULL, Btseq = NULL; 2831 PetscBool flg; 2832 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2833 PetscScalar *aa; 2834 PetscInt spnr, *ia, *ja, M, nrhs; 2835 2836 PetscFunctionBegin; 2837 PetscValidPointer(spRHS, 2); 2838 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2839 if (flg) { 2840 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2841 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2842 2843 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2844 2845 if (mumps->petsc_size > 1) { 2846 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2847 Btseq = b->A; 2848 } else { 2849 Btseq = Bt; 2850 } 2851 2852 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2853 mumps->id.nrhs = nrhs; 2854 mumps->id.lrhs = M; 2855 mumps->id.rhs = NULL; 2856 2857 if (!mumps->myid) { 2858 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2859 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2860 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2861 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2862 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2863 } else { 2864 mumps->id.irhs_ptr = NULL; 2865 mumps->id.irhs_sparse = NULL; 2866 mumps->id.nz_rhs = 0; 2867 mumps->id.rhs_sparse = NULL; 2868 } 2869 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2870 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2871 2872 /* solve phase */ 2873 mumps->id.job = JOB_SOLVE; 2874 PetscMUMPS_c(mumps); 2875 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 2876 2877 if (!mumps->myid) { 2878 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 2879 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2880 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2881 } 2882 PetscFunctionReturn(PETSC_SUCCESS); 2883 } 2884 2885 /*@ 2886 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 2887 2888 Logically Collective 2889 2890 Input Parameter: 2891 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2892 2893 Output Parameter: 2894 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 2895 2896 Level: beginner 2897 2898 References: 2899 . * - MUMPS Users' Guide 2900 2901 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 2902 @*/ 2903 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2904 { 2905 PetscFunctionBegin; 2906 PetscValidType(F, 1); 2907 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2908 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 2909 PetscFunctionReturn(PETSC_SUCCESS); 2910 } 2911 2912 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 2913 { 2914 Mat spRHS; 2915 2916 PetscFunctionBegin; 2917 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 2918 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 2919 PetscCall(MatDestroy(&spRHS)); 2920 PetscFunctionReturn(PETSC_SUCCESS); 2921 } 2922 2923 /*@ 2924 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 2925 2926 Logically Collective 2927 2928 Input Parameter: 2929 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2930 2931 Output Parameter: 2932 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 2933 2934 Level: beginner 2935 2936 References: 2937 . * - MUMPS Users' Guide 2938 2939 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 2940 @*/ 2941 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 2942 { 2943 PetscBool flg; 2944 2945 PetscFunctionBegin; 2946 PetscValidType(F, 1); 2947 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2948 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 2949 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 2950 2951 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 2952 PetscFunctionReturn(PETSC_SUCCESS); 2953 } 2954 2955 /*@ 2956 MatMumpsGetInfo - Get MUMPS parameter INFO() 2957 2958 Logically Collective 2959 2960 Input Parameters: 2961 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2962 - icntl - index of MUMPS parameter array INFO() 2963 2964 Output Parameter: 2965 . ival - value of MUMPS INFO(icntl) 2966 2967 Level: beginner 2968 2969 References: 2970 . * - MUMPS Users' Guide 2971 2972 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2973 @*/ 2974 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 2975 { 2976 PetscFunctionBegin; 2977 PetscValidType(F, 1); 2978 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2979 PetscValidIntPointer(ival, 3); 2980 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2981 PetscFunctionReturn(PETSC_SUCCESS); 2982 } 2983 2984 /*@ 2985 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2986 2987 Logically Collective 2988 2989 Input Parameters: 2990 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2991 - icntl - index of MUMPS parameter array INFOG() 2992 2993 Output Parameter: 2994 . ival - value of MUMPS INFOG(icntl) 2995 2996 Level: beginner 2997 2998 References: 2999 . * - MUMPS Users' Guide 3000 3001 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3002 @*/ 3003 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3004 { 3005 PetscFunctionBegin; 3006 PetscValidType(F, 1); 3007 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3008 PetscValidIntPointer(ival, 3); 3009 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3010 PetscFunctionReturn(PETSC_SUCCESS); 3011 } 3012 3013 /*@ 3014 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3015 3016 Logically Collective 3017 3018 Input Parameters: 3019 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3020 - icntl - index of MUMPS parameter array RINFO() 3021 3022 Output Parameter: 3023 . val - value of MUMPS RINFO(icntl) 3024 3025 Level: beginner 3026 3027 References: 3028 . * - MUMPS Users' Guide 3029 3030 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3031 @*/ 3032 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3033 { 3034 PetscFunctionBegin; 3035 PetscValidType(F, 1); 3036 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3037 PetscValidRealPointer(val, 3); 3038 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3039 PetscFunctionReturn(PETSC_SUCCESS); 3040 } 3041 3042 /*@ 3043 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3044 3045 Logically Collective 3046 3047 Input Parameters: 3048 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3049 - icntl - index of MUMPS parameter array RINFOG() 3050 3051 Output Parameter: 3052 . val - value of MUMPS RINFOG(icntl) 3053 3054 Level: beginner 3055 3056 References: 3057 . * - MUMPS Users' Guide 3058 3059 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3060 @*/ 3061 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3062 { 3063 PetscFunctionBegin; 3064 PetscValidType(F, 1); 3065 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3066 PetscValidRealPointer(val, 3); 3067 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3068 PetscFunctionReturn(PETSC_SUCCESS); 3069 } 3070 3071 /*@ 3072 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 3073 3074 Logically Collective 3075 3076 Input Parameter: 3077 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3078 3079 Output Parameters: 3080 + size - local size of the array. The size of the array is non-zero only on the host. 3081 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3082 for freeing this array. 3083 3084 Level: beginner 3085 3086 References: 3087 . * - MUMPS Users' Guide 3088 3089 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3090 @*/ 3091 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3092 { 3093 PetscFunctionBegin; 3094 PetscValidType(F, 1); 3095 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3096 PetscValidIntPointer(size, 3); 3097 PetscValidPointer(array, 4); 3098 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3099 PetscFunctionReturn(PETSC_SUCCESS); 3100 } 3101 3102 /*MC 3103 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3104 distributed and sequential matrices via the external package MUMPS. 3105 3106 Works with `MATAIJ` and `MATSBAIJ` matrices 3107 3108 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3109 3110 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3111 See details below. 3112 3113 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3114 3115 Options Database Keys: 3116 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3117 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3118 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3119 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3120 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3121 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3122 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3123 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3124 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3125 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3126 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3127 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3128 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3129 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3130 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3131 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3132 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3133 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3134 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3135 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3136 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3137 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 3138 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3139 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3140 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3141 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3142 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3143 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3144 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3145 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3146 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3147 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3148 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3149 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3150 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3151 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3152 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3153 3154 Level: beginner 3155 3156 Notes: 3157 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 3158 error if the matrix is Hermitian. 3159 3160 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3161 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3162 3163 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3164 the failure with 3165 .vb 3166 KSPGetPC(ksp,&pc); 3167 PCFactorGetMatrix(pc,&mat); 3168 MatMumpsGetInfo(mat,....); 3169 MatMumpsGetInfog(mat,....); etc. 3170 .ve 3171 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3172 3173 MUMPS provides 64-bit integer support in two build modes: 3174 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3175 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3176 3177 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3178 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3179 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3180 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3181 3182 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3183 3184 Two modes to run MUMPS/PETSc with OpenMP 3185 .vb 3186 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3187 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3188 .ve 3189 3190 .vb 3191 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3192 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3193 .ve 3194 3195 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3196 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3197 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3198 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3199 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3200 3201 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3202 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3203 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3204 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3205 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3206 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3207 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3208 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3209 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3210 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3211 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3212 examine the mapping result. 3213 3214 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3215 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3216 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3217 3218 References: 3219 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3220 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3221 3222 .seealso: [](chapter_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3223 M*/ 3224 3225 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3226 { 3227 PetscFunctionBegin; 3228 *type = MATSOLVERMUMPS; 3229 PetscFunctionReturn(PETSC_SUCCESS); 3230 } 3231 3232 /* MatGetFactor for Seq and MPI AIJ matrices */ 3233 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3234 { 3235 Mat B; 3236 Mat_MUMPS *mumps; 3237 PetscBool isSeqAIJ; 3238 PetscMPIInt size; 3239 3240 PetscFunctionBegin; 3241 #if defined(PETSC_USE_COMPLEX) 3242 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3243 #endif 3244 /* Create the factorization matrix */ 3245 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3246 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3247 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3248 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3249 PetscCall(MatSetUp(B)); 3250 3251 PetscCall(PetscNew(&mumps)); 3252 3253 B->ops->view = MatView_MUMPS; 3254 B->ops->getinfo = MatGetInfo_MUMPS; 3255 3256 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3257 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3258 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3259 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3260 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3261 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3262 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3263 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3264 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3265 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3266 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3267 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3268 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3269 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3270 3271 if (ftype == MAT_FACTOR_LU) { 3272 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3273 B->factortype = MAT_FACTOR_LU; 3274 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3275 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3276 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3277 mumps->sym = 0; 3278 } else { 3279 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3280 B->factortype = MAT_FACTOR_CHOLESKY; 3281 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3282 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3283 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3284 #if defined(PETSC_USE_COMPLEX) 3285 mumps->sym = 2; 3286 #else 3287 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3288 else mumps->sym = 2; 3289 #endif 3290 } 3291 3292 /* set solvertype */ 3293 PetscCall(PetscFree(B->solvertype)); 3294 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3295 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3296 if (size == 1) { 3297 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3298 B->canuseordering = PETSC_TRUE; 3299 } 3300 B->ops->destroy = MatDestroy_MUMPS; 3301 B->data = (void *)mumps; 3302 3303 *F = B; 3304 mumps->id.job = JOB_NULL; 3305 mumps->ICNTL_pre = NULL; 3306 mumps->CNTL_pre = NULL; 3307 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3308 PetscFunctionReturn(PETSC_SUCCESS); 3309 } 3310 3311 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3312 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3313 { 3314 Mat B; 3315 Mat_MUMPS *mumps; 3316 PetscBool isSeqSBAIJ; 3317 PetscMPIInt size; 3318 3319 PetscFunctionBegin; 3320 #if defined(PETSC_USE_COMPLEX) 3321 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3322 #endif 3323 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3324 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3325 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3326 PetscCall(MatSetUp(B)); 3327 3328 PetscCall(PetscNew(&mumps)); 3329 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3330 if (isSeqSBAIJ) { 3331 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3332 } else { 3333 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3334 } 3335 3336 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3337 B->ops->view = MatView_MUMPS; 3338 B->ops->getinfo = MatGetInfo_MUMPS; 3339 3340 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3341 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3342 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3343 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3344 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3345 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3346 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3347 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3348 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3349 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3350 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3351 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3352 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3353 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3354 3355 B->factortype = MAT_FACTOR_CHOLESKY; 3356 #if defined(PETSC_USE_COMPLEX) 3357 mumps->sym = 2; 3358 #else 3359 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3360 else mumps->sym = 2; 3361 #endif 3362 3363 /* set solvertype */ 3364 PetscCall(PetscFree(B->solvertype)); 3365 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3366 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3367 if (size == 1) { 3368 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3369 B->canuseordering = PETSC_TRUE; 3370 } 3371 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3372 B->ops->destroy = MatDestroy_MUMPS; 3373 B->data = (void *)mumps; 3374 3375 *F = B; 3376 mumps->id.job = JOB_NULL; 3377 mumps->ICNTL_pre = NULL; 3378 mumps->CNTL_pre = NULL; 3379 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3380 PetscFunctionReturn(PETSC_SUCCESS); 3381 } 3382 3383 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3384 { 3385 Mat B; 3386 Mat_MUMPS *mumps; 3387 PetscBool isSeqBAIJ; 3388 PetscMPIInt size; 3389 3390 PetscFunctionBegin; 3391 /* Create the factorization matrix */ 3392 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3393 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3394 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3395 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3396 PetscCall(MatSetUp(B)); 3397 3398 PetscCall(PetscNew(&mumps)); 3399 if (ftype == MAT_FACTOR_LU) { 3400 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3401 B->factortype = MAT_FACTOR_LU; 3402 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3403 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3404 mumps->sym = 0; 3405 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3406 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3407 3408 B->ops->view = MatView_MUMPS; 3409 B->ops->getinfo = MatGetInfo_MUMPS; 3410 3411 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3412 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3413 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3414 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3415 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3416 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3417 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3418 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3419 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3420 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3421 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3422 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3423 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3424 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3425 3426 /* set solvertype */ 3427 PetscCall(PetscFree(B->solvertype)); 3428 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3429 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3430 if (size == 1) { 3431 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3432 B->canuseordering = PETSC_TRUE; 3433 } 3434 B->ops->destroy = MatDestroy_MUMPS; 3435 B->data = (void *)mumps; 3436 3437 *F = B; 3438 mumps->id.job = JOB_NULL; 3439 mumps->ICNTL_pre = NULL; 3440 mumps->CNTL_pre = NULL; 3441 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3442 PetscFunctionReturn(PETSC_SUCCESS); 3443 } 3444 3445 /* MatGetFactor for Seq and MPI SELL matrices */ 3446 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3447 { 3448 Mat B; 3449 Mat_MUMPS *mumps; 3450 PetscBool isSeqSELL; 3451 PetscMPIInt size; 3452 3453 PetscFunctionBegin; 3454 /* Create the factorization matrix */ 3455 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3456 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3457 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3458 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3459 PetscCall(MatSetUp(B)); 3460 3461 PetscCall(PetscNew(&mumps)); 3462 3463 B->ops->view = MatView_MUMPS; 3464 B->ops->getinfo = MatGetInfo_MUMPS; 3465 3466 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3467 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3468 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3469 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3470 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3471 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3472 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3473 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3474 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3475 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3476 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3477 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3478 3479 if (ftype == MAT_FACTOR_LU) { 3480 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3481 B->factortype = MAT_FACTOR_LU; 3482 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3483 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3484 mumps->sym = 0; 3485 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3486 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3487 3488 /* set solvertype */ 3489 PetscCall(PetscFree(B->solvertype)); 3490 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3491 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3492 if (size == 1) { 3493 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3494 B->canuseordering = PETSC_TRUE; 3495 } 3496 B->ops->destroy = MatDestroy_MUMPS; 3497 B->data = (void *)mumps; 3498 3499 *F = B; 3500 mumps->id.job = JOB_NULL; 3501 mumps->ICNTL_pre = NULL; 3502 mumps->CNTL_pre = NULL; 3503 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3504 PetscFunctionReturn(PETSC_SUCCESS); 3505 } 3506 3507 /* MatGetFactor for MATNEST matrices */ 3508 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3509 { 3510 Mat B, **mats; 3511 Mat_MUMPS *mumps; 3512 PetscInt nr, nc; 3513 PetscMPIInt size; 3514 3515 PetscFunctionBegin; 3516 #if defined(PETSC_USE_COMPLEX) 3517 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3518 #endif 3519 3520 /* Errors if some condition is not satisfied */ 3521 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3522 if (ftype == MAT_FACTOR_CHOLESKY) { 3523 IS *rows, *cols; 3524 PetscBool flg = PETSC_TRUE; 3525 PetscInt *m, *M; 3526 3527 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3528 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3529 PetscCall(MatNestGetISs(A, rows, cols)); 3530 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3531 if (!flg) PetscCall(PetscFree2(rows, cols)); 3532 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU."); 3533 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3534 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3535 for (PetscInt r = 0; flg && r < nr; r++) 3536 for (PetscInt k = r + 1; flg && k < nr; k++) 3537 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3538 PetscCall(PetscFree2(m, M)); 3539 PetscCall(PetscFree2(rows, cols)); 3540 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU."); 3541 } 3542 3543 for (PetscInt r = 0; r < nr; r++) { 3544 for (PetscInt c = 0; c < nc; c++) { 3545 Mat sub = mats[r][c]; 3546 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ; 3547 3548 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3549 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3550 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3551 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3552 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3553 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3554 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3555 if (ftype == MAT_FACTOR_CHOLESKY) { 3556 if (r == c) PetscCheck(isSeqAIJ || isMPIAIJ || isSeqSBAIJ || isMPISBAIJ, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for diagonal block of type %s", ((PetscObject)sub)->type_name); 3557 else PetscCheck(isSeqAIJ || isMPIAIJ || isSeqBAIJ || isMPIBAIJ, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for offdiagonal block of type %s", ((PetscObject)sub)->type_name); 3558 } else PetscCheck(isSeqAIJ || isMPIAIJ || isSeqBAIJ || isMPIBAIJ, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 3559 } 3560 } 3561 3562 /* Create the factorization matrix */ 3563 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3564 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3565 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3566 PetscCall(MatSetUp(B)); 3567 3568 PetscCall(PetscNew(&mumps)); 3569 3570 B->ops->view = MatView_MUMPS; 3571 B->ops->getinfo = MatGetInfo_MUMPS; 3572 3573 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3574 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3575 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3576 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3577 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3578 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3579 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3580 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3581 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3582 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3583 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3584 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3585 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3586 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3587 3588 if (ftype == MAT_FACTOR_LU) { 3589 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3590 B->factortype = MAT_FACTOR_LU; 3591 mumps->sym = 0; 3592 } else { 3593 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3594 B->factortype = MAT_FACTOR_CHOLESKY; 3595 #if defined(PETSC_USE_COMPLEX) 3596 mumps->sym = 2; 3597 #else 3598 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3599 else mumps->sym = 2; 3600 #endif 3601 } 3602 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3603 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3604 3605 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3606 if (size == 1) { 3607 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3608 B->canuseordering = PETSC_TRUE; 3609 } 3610 3611 /* set solvertype */ 3612 PetscCall(PetscFree(B->solvertype)); 3613 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3614 B->ops->destroy = MatDestroy_MUMPS; 3615 B->data = (void *)mumps; 3616 3617 *F = B; 3618 mumps->id.job = JOB_NULL; 3619 mumps->ICNTL_pre = NULL; 3620 mumps->CNTL_pre = NULL; 3621 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3622 PetscFunctionReturn(PETSC_SUCCESS); 3623 } 3624 3625 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3626 { 3627 PetscFunctionBegin; 3628 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3629 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3630 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3631 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3632 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3633 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3634 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3635 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3636 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3637 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3638 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3639 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3640 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3641 PetscFunctionReturn(PETSC_SUCCESS); 3642 } 3643