1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <petscsf.h> 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 9 #include <../src/mat/impls/sell/mpi/mpisell.h> 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #if defined(PETSC_USE_REAL_SINGLE) 14 #include <cmumps_c.h> 15 #else 16 #include <zmumps_c.h> 17 #endif 18 #else 19 #if defined(PETSC_USE_REAL_SINGLE) 20 #include <smumps_c.h> 21 #else 22 #include <dmumps_c.h> 23 #endif 24 #endif 25 EXTERN_C_END 26 #define JOB_INIT -1 27 #define JOB_NULL 0 28 #define JOB_FACTSYMBOLIC 1 29 #define JOB_FACTNUMERIC 2 30 #define JOB_SOLVE 3 31 #define JOB_END -2 32 33 /* calls to MUMPS */ 34 #if defined(PETSC_USE_COMPLEX) 35 #if defined(PETSC_USE_REAL_SINGLE) 36 #define MUMPS_c cmumps_c 37 #else 38 #define MUMPS_c zmumps_c 39 #endif 40 #else 41 #if defined(PETSC_USE_REAL_SINGLE) 42 #define MUMPS_c smumps_c 43 #else 44 #define MUMPS_c dmumps_c 45 #endif 46 #endif 47 48 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 49 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 50 naming convention in PetscMPIInt, PetscBLASInt etc. 51 */ 52 typedef MUMPS_INT PetscMUMPSInt; 53 54 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 55 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 56 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 57 #endif 58 #else 59 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 60 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 61 #endif 62 #endif 63 64 #define MPIU_MUMPSINT MPI_INT 65 #define PETSC_MUMPS_INT_MAX 2147483647 66 #define PETSC_MUMPS_INT_MIN -2147483648 67 68 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 69 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 70 { 71 PetscFunctionBegin; 72 #if PetscDefined(USE_64BIT_INDICES) 73 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 74 #endif 75 *b = (PetscMUMPSInt)(a); 76 PetscFunctionReturn(PETSC_SUCCESS); 77 } 78 79 /* Put these utility routines here since they are only used in this file */ 80 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 81 { 82 PetscInt myval; 83 PetscBool myset; 84 PetscFunctionBegin; 85 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 86 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 87 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 88 if (set) *set = myset; 89 PetscFunctionReturn(PETSC_SUCCESS); 90 } 91 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 92 93 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 94 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 95 #define PetscMUMPS_c(mumps) \ 96 do { \ 97 if (mumps->use_petsc_omp_support) { \ 98 if (mumps->is_omp_master) { \ 99 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 100 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 101 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 102 PetscCall(PetscFPTrapPop()); \ 103 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 104 } \ 105 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 106 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 107 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 108 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 109 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 110 */ \ 111 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 112 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 113 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 115 } else { \ 116 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 117 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 118 PetscCall(PetscFPTrapPop()); \ 119 } \ 120 } while (0) 121 #else 122 #define PetscMUMPS_c(mumps) \ 123 do { \ 124 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 125 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 126 PetscCall(PetscFPTrapPop()); \ 127 } while (0) 128 #endif 129 130 /* declare MumpsScalar */ 131 #if defined(PETSC_USE_COMPLEX) 132 #if defined(PETSC_USE_REAL_SINGLE) 133 #define MumpsScalar mumps_complex 134 #else 135 #define MumpsScalar mumps_double_complex 136 #endif 137 #else 138 #define MumpsScalar PetscScalar 139 #endif 140 141 /* macros s.t. indices match MUMPS documentation */ 142 #define ICNTL(I) icntl[(I)-1] 143 #define CNTL(I) cntl[(I)-1] 144 #define INFOG(I) infog[(I)-1] 145 #define INFO(I) info[(I)-1] 146 #define RINFOG(I) rinfog[(I)-1] 147 #define RINFO(I) rinfo[(I)-1] 148 149 typedef struct Mat_MUMPS Mat_MUMPS; 150 struct Mat_MUMPS { 151 #if defined(PETSC_USE_COMPLEX) 152 #if defined(PETSC_USE_REAL_SINGLE) 153 CMUMPS_STRUC_C id; 154 #else 155 ZMUMPS_STRUC_C id; 156 #endif 157 #else 158 #if defined(PETSC_USE_REAL_SINGLE) 159 SMUMPS_STRUC_C id; 160 #else 161 DMUMPS_STRUC_C id; 162 #endif 163 #endif 164 165 MatStructure matstruc; 166 PetscMPIInt myid, petsc_size; 167 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 168 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 169 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 170 PetscMUMPSInt sym; 171 MPI_Comm mumps_comm; 172 PetscMUMPSInt *ICNTL_pre; 173 PetscReal *CNTL_pre; 174 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 175 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 176 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 177 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 178 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 179 PetscInt *rhs_nrow, max_nrhs; 180 PetscMPIInt *rhs_recvcounts, *rhs_disps; 181 PetscScalar *rhs_loc, *rhs_recvbuf; 182 #endif 183 Vec b_seq, x_seq; 184 PetscInt ninfo, *info; /* which INFO to display */ 185 PetscInt sizeredrhs; 186 PetscScalar *schur_sol; 187 PetscInt schur_sizesol; 188 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 189 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 190 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 191 192 /* Support for MATNEST */ 193 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 194 PetscInt64 *nest_vals_start; 195 PetscScalar *nest_vals; 196 197 /* stuff used by petsc/mumps OpenMP support*/ 198 PetscBool use_petsc_omp_support; 199 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 200 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 201 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 202 PetscMPIInt tag, omp_comm_size; 203 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 204 MPI_Request *reqs; 205 }; 206 207 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 208 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 209 */ 210 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 211 { 212 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 213 214 PetscFunctionBegin; 215 #if defined(PETSC_USE_64BIT_INDICES) 216 { 217 PetscInt i; 218 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 219 PetscCall(PetscFree(mumps->ia_alloc)); 220 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 221 mumps->cur_ilen = nrow + 1; 222 } 223 if (nnz > mumps->cur_jlen) { 224 PetscCall(PetscFree(mumps->ja_alloc)); 225 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 226 mumps->cur_jlen = nnz; 227 } 228 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 229 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 230 *ia_mumps = mumps->ia_alloc; 231 *ja_mumps = mumps->ja_alloc; 232 } 233 #else 234 *ia_mumps = ia; 235 *ja_mumps = ja; 236 #endif 237 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 242 { 243 PetscFunctionBegin; 244 PetscCall(PetscFree(mumps->id.listvar_schur)); 245 PetscCall(PetscFree(mumps->id.redrhs)); 246 PetscCall(PetscFree(mumps->schur_sol)); 247 mumps->id.size_schur = 0; 248 mumps->id.schur_lld = 0; 249 mumps->id.ICNTL(19) = 0; 250 PetscFunctionReturn(PETSC_SUCCESS); 251 } 252 253 /* solve with rhs in mumps->id.redrhs and return in the same location */ 254 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 255 { 256 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 257 Mat S, B, X; 258 MatFactorSchurStatus schurstatus; 259 PetscInt sizesol; 260 261 PetscFunctionBegin; 262 PetscCall(MatFactorFactorizeSchurComplement(F)); 263 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 264 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 265 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 266 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 267 PetscCall(MatBindToCPU(B, S->boundtocpu)); 268 #endif 269 switch (schurstatus) { 270 case MAT_FACTOR_SCHUR_FACTORED: 271 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 272 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 273 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 274 PetscCall(MatBindToCPU(X, S->boundtocpu)); 275 #endif 276 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 277 PetscCall(MatMatSolveTranspose(S, B, X)); 278 } else { 279 PetscCall(MatMatSolve(S, B, X)); 280 } 281 break; 282 case MAT_FACTOR_SCHUR_INVERTED: 283 sizesol = mumps->id.nrhs * mumps->id.size_schur; 284 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 285 PetscCall(PetscFree(mumps->schur_sol)); 286 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 287 mumps->schur_sizesol = sizesol; 288 } 289 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 290 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 291 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 292 PetscCall(MatBindToCPU(X, S->boundtocpu)); 293 #endif 294 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 295 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 296 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 297 } else { 298 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 299 } 300 PetscCall(MatProductSetFromOptions(X)); 301 PetscCall(MatProductSymbolic(X)); 302 PetscCall(MatProductNumeric(X)); 303 304 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 305 break; 306 default: 307 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 308 } 309 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 310 PetscCall(MatDestroy(&B)); 311 PetscCall(MatDestroy(&X)); 312 PetscFunctionReturn(PETSC_SUCCESS); 313 } 314 315 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 316 { 317 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 318 319 PetscFunctionBegin; 320 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 321 PetscFunctionReturn(PETSC_SUCCESS); 322 } 323 if (!expansion) { /* prepare for the condensation step */ 324 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 325 /* allocate MUMPS internal array to store reduced right-hand sides */ 326 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 327 PetscCall(PetscFree(mumps->id.redrhs)); 328 mumps->id.lredrhs = mumps->id.size_schur; 329 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 330 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 331 } 332 mumps->id.ICNTL(26) = 1; /* condensation phase */ 333 } else { /* prepare for the expansion step */ 334 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 335 PetscCall(MatMumpsSolveSchur_Private(F)); 336 mumps->id.ICNTL(26) = 2; /* expansion phase */ 337 PetscMUMPS_c(mumps); 338 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 339 /* restore defaults */ 340 mumps->id.ICNTL(26) = -1; 341 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 342 if (mumps->id.nrhs > 1) { 343 PetscCall(PetscFree(mumps->id.redrhs)); 344 mumps->id.lredrhs = 0; 345 mumps->sizeredrhs = 0; 346 } 347 } 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 /* 352 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 353 354 input: 355 A - matrix in aij,baij or sbaij format 356 shift - 0: C style output triple; 1: Fortran style output triple. 357 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 358 MAT_REUSE_MATRIX: only the values in v array are updated 359 output: 360 nnz - dim of r, c, and v (number of local nonzero entries of A) 361 r, c, v - row and col index, matrix values (matrix triples) 362 363 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 364 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 365 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 366 367 */ 368 369 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 370 { 371 const PetscScalar *av; 372 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 373 PetscInt64 nz, rnz, i, j, k; 374 PetscMUMPSInt *row, *col; 375 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 376 377 PetscFunctionBegin; 378 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 379 mumps->val = (PetscScalar *)av; 380 if (reuse == MAT_INITIAL_MATRIX) { 381 nz = aa->nz; 382 ai = aa->i; 383 aj = aa->j; 384 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 385 for (i = k = 0; i < M; i++) { 386 rnz = ai[i + 1] - ai[i]; 387 ajj = aj + ai[i]; 388 for (j = 0; j < rnz; j++) { 389 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 390 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 391 k++; 392 } 393 } 394 mumps->irn = row; 395 mumps->jcn = col; 396 mumps->nnz = nz; 397 } 398 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 403 { 404 PetscInt64 nz, i, j, k, r; 405 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 406 PetscMUMPSInt *row, *col; 407 408 PetscFunctionBegin; 409 mumps->val = a->val; 410 if (reuse == MAT_INITIAL_MATRIX) { 411 nz = a->sliidx[a->totalslices]; 412 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 413 for (i = k = 0; i < a->totalslices; i++) { 414 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 415 } 416 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 417 mumps->irn = row; 418 mumps->jcn = col; 419 mumps->nnz = nz; 420 } 421 PetscFunctionReturn(PETSC_SUCCESS); 422 } 423 424 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 425 { 426 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 427 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 428 PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 429 PetscInt bs; 430 PetscMUMPSInt *row, *col; 431 432 PetscFunctionBegin; 433 PetscCall(MatGetBlockSize(A, &bs)); 434 M = A->rmap->N / bs; 435 mumps->val = aa->a; 436 if (reuse == MAT_INITIAL_MATRIX) { 437 ai = aa->i; 438 aj = aa->j; 439 nz = bs2 * aa->nz; 440 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 441 for (i = 0; i < M; i++) { 442 ajj = aj + ai[i]; 443 rnz = ai[i + 1] - ai[i]; 444 for (k = 0; k < rnz; k++) { 445 for (j = 0; j < bs; j++) { 446 for (m = 0; m < bs; m++) { 447 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 448 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 449 idx++; 450 } 451 } 452 } 453 } 454 mumps->irn = row; 455 mumps->jcn = col; 456 mumps->nnz = nz; 457 } 458 PetscFunctionReturn(PETSC_SUCCESS); 459 } 460 461 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 462 { 463 const PetscInt *ai, *aj, *ajj; 464 PetscInt bs; 465 PetscInt64 nz, rnz, i, j, k, m; 466 PetscMUMPSInt *row, *col; 467 PetscScalar *val; 468 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 469 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 470 #if defined(PETSC_USE_COMPLEX) 471 PetscBool isset, hermitian; 472 #endif 473 474 PetscFunctionBegin; 475 #if defined(PETSC_USE_COMPLEX) 476 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 477 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 478 #endif 479 ai = aa->i; 480 aj = aa->j; 481 PetscCall(MatGetBlockSize(A, &bs)); 482 if (reuse == MAT_INITIAL_MATRIX) { 483 const PetscInt64 alloc_size = aa->nz * bs2; 484 485 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 486 if (bs > 1) { 487 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 488 mumps->val = mumps->val_alloc; 489 } else { 490 mumps->val = aa->a; 491 } 492 mumps->irn = row; 493 mumps->jcn = col; 494 } else { 495 if (bs == 1) mumps->val = aa->a; 496 row = mumps->irn; 497 col = mumps->jcn; 498 } 499 val = mumps->val; 500 501 nz = 0; 502 if (bs > 1) { 503 for (i = 0; i < mbs; i++) { 504 rnz = ai[i + 1] - ai[i]; 505 ajj = aj + ai[i]; 506 for (j = 0; j < rnz; j++) { 507 for (k = 0; k < bs; k++) { 508 for (m = 0; m < bs; m++) { 509 if (ajj[j] > i || k >= m) { 510 if (reuse == MAT_INITIAL_MATRIX) { 511 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 512 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 513 } 514 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 515 } 516 } 517 } 518 } 519 } 520 } else if (reuse == MAT_INITIAL_MATRIX) { 521 for (i = 0; i < mbs; i++) { 522 rnz = ai[i + 1] - ai[i]; 523 ajj = aj + ai[i]; 524 for (j = 0; j < rnz; j++) { 525 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 526 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 527 nz++; 528 } 529 } 530 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 531 } 532 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 533 PetscFunctionReturn(PETSC_SUCCESS); 534 } 535 536 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 537 { 538 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 539 PetscInt64 nz, rnz, i, j; 540 const PetscScalar *av, *v1; 541 PetscScalar *val; 542 PetscMUMPSInt *row, *col; 543 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 544 PetscBool missing; 545 #if defined(PETSC_USE_COMPLEX) 546 PetscBool hermitian, isset; 547 #endif 548 549 PetscFunctionBegin; 550 #if defined(PETSC_USE_COMPLEX) 551 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 552 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 553 #endif 554 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 555 ai = aa->i; 556 aj = aa->j; 557 adiag = aa->diag; 558 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 559 if (reuse == MAT_INITIAL_MATRIX) { 560 /* count nz in the upper triangular part of A */ 561 nz = 0; 562 if (missing) { 563 for (i = 0; i < M; i++) { 564 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 565 for (j = ai[i]; j < ai[i + 1]; j++) { 566 if (aj[j] < i) continue; 567 nz++; 568 } 569 } else { 570 nz += ai[i + 1] - adiag[i]; 571 } 572 } 573 } else { 574 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 575 } 576 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 577 PetscCall(PetscMalloc1(nz, &val)); 578 mumps->nnz = nz; 579 mumps->irn = row; 580 mumps->jcn = col; 581 mumps->val = mumps->val_alloc = val; 582 583 nz = 0; 584 if (missing) { 585 for (i = 0; i < M; i++) { 586 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 587 for (j = ai[i]; j < ai[i + 1]; j++) { 588 if (aj[j] < i) continue; 589 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 590 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 591 val[nz] = av[j]; 592 nz++; 593 } 594 } else { 595 rnz = ai[i + 1] - adiag[i]; 596 ajj = aj + adiag[i]; 597 v1 = av + adiag[i]; 598 for (j = 0; j < rnz; j++) { 599 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 600 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 601 val[nz++] = v1[j]; 602 } 603 } 604 } 605 } else { 606 for (i = 0; i < M; i++) { 607 rnz = ai[i + 1] - adiag[i]; 608 ajj = aj + adiag[i]; 609 v1 = av + adiag[i]; 610 for (j = 0; j < rnz; j++) { 611 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 612 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 613 val[nz++] = v1[j]; 614 } 615 } 616 } 617 } else { 618 nz = 0; 619 val = mumps->val; 620 if (missing) { 621 for (i = 0; i < M; i++) { 622 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 623 for (j = ai[i]; j < ai[i + 1]; j++) { 624 if (aj[j] < i) continue; 625 val[nz++] = av[j]; 626 } 627 } else { 628 rnz = ai[i + 1] - adiag[i]; 629 v1 = av + adiag[i]; 630 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 631 } 632 } 633 } else { 634 for (i = 0; i < M; i++) { 635 rnz = ai[i + 1] - adiag[i]; 636 v1 = av + adiag[i]; 637 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 638 } 639 } 640 } 641 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 642 PetscFunctionReturn(PETSC_SUCCESS); 643 } 644 645 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 646 { 647 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 648 PetscInt bs; 649 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 650 PetscMUMPSInt *row, *col; 651 const PetscScalar *av, *bv, *v1, *v2; 652 PetscScalar *val; 653 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 654 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 655 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 656 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 657 #if defined(PETSC_USE_COMPLEX) 658 PetscBool hermitian, isset; 659 #endif 660 661 PetscFunctionBegin; 662 #if defined(PETSC_USE_COMPLEX) 663 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 664 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 665 #endif 666 PetscCall(MatGetBlockSize(A, &bs)); 667 rstart = A->rmap->rstart; 668 ai = aa->i; 669 aj = aa->j; 670 bi = bb->i; 671 bj = bb->j; 672 av = aa->a; 673 bv = bb->a; 674 675 garray = mat->garray; 676 677 if (reuse == MAT_INITIAL_MATRIX) { 678 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 679 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 680 PetscCall(PetscMalloc1(nz, &val)); 681 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 682 mumps->irn = row; 683 mumps->jcn = col; 684 mumps->val = mumps->val_alloc = val; 685 } else { 686 val = mumps->val; 687 } 688 689 jj = 0; 690 irow = rstart; 691 for (i = 0; i < mbs; i++) { 692 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 693 countA = ai[i + 1] - ai[i]; 694 countB = bi[i + 1] - bi[i]; 695 bjj = bj + bi[i]; 696 v1 = av + ai[i] * bs2; 697 v2 = bv + bi[i] * bs2; 698 699 if (bs > 1) { 700 /* A-part */ 701 for (j = 0; j < countA; j++) { 702 for (k = 0; k < bs; k++) { 703 for (m = 0; m < bs; m++) { 704 if (rstart + ajj[j] * bs > irow || k >= m) { 705 if (reuse == MAT_INITIAL_MATRIX) { 706 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 707 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 708 } 709 val[jj++] = v1[j * bs2 + m + k * bs]; 710 } 711 } 712 } 713 } 714 715 /* B-part */ 716 for (j = 0; j < countB; j++) { 717 for (k = 0; k < bs; k++) { 718 for (m = 0; m < bs; m++) { 719 if (reuse == MAT_INITIAL_MATRIX) { 720 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 721 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 722 } 723 val[jj++] = v2[j * bs2 + m + k * bs]; 724 } 725 } 726 } 727 } else { 728 /* A-part */ 729 for (j = 0; j < countA; j++) { 730 if (reuse == MAT_INITIAL_MATRIX) { 731 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 732 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 733 } 734 val[jj++] = v1[j]; 735 } 736 737 /* B-part */ 738 for (j = 0; j < countB; j++) { 739 if (reuse == MAT_INITIAL_MATRIX) { 740 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 741 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 742 } 743 val[jj++] = v2[j]; 744 } 745 } 746 irow += bs; 747 } 748 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 749 PetscFunctionReturn(PETSC_SUCCESS); 750 } 751 752 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 753 { 754 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 755 PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 756 PetscMUMPSInt *row, *col; 757 const PetscScalar *av, *bv, *v1, *v2; 758 PetscScalar *val; 759 Mat Ad, Ao; 760 Mat_SeqAIJ *aa; 761 Mat_SeqAIJ *bb; 762 763 PetscFunctionBegin; 764 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 765 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 766 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 767 768 aa = (Mat_SeqAIJ *)(Ad)->data; 769 bb = (Mat_SeqAIJ *)(Ao)->data; 770 ai = aa->i; 771 aj = aa->j; 772 bi = bb->i; 773 bj = bb->j; 774 775 rstart = A->rmap->rstart; 776 cstart = A->cmap->rstart; 777 778 if (reuse == MAT_INITIAL_MATRIX) { 779 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 780 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 781 PetscCall(PetscMalloc1(nz, &val)); 782 mumps->nnz = nz; 783 mumps->irn = row; 784 mumps->jcn = col; 785 mumps->val = mumps->val_alloc = val; 786 } else { 787 val = mumps->val; 788 } 789 790 jj = 0; 791 irow = rstart; 792 for (i = 0; i < m; i++) { 793 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 794 countA = ai[i + 1] - ai[i]; 795 countB = bi[i + 1] - bi[i]; 796 bjj = bj + bi[i]; 797 v1 = av + ai[i]; 798 v2 = bv + bi[i]; 799 800 /* A-part */ 801 for (j = 0; j < countA; j++) { 802 if (reuse == MAT_INITIAL_MATRIX) { 803 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 804 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 805 } 806 val[jj++] = v1[j]; 807 } 808 809 /* B-part */ 810 for (j = 0; j < countB; j++) { 811 if (reuse == MAT_INITIAL_MATRIX) { 812 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 813 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 814 } 815 val[jj++] = v2[j]; 816 } 817 irow++; 818 } 819 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 820 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 821 PetscFunctionReturn(PETSC_SUCCESS); 822 } 823 824 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 825 { 826 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 827 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 828 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 829 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 830 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 831 const PetscInt bs2 = mat->bs2; 832 PetscInt bs; 833 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 834 PetscMUMPSInt *row, *col; 835 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 836 PetscScalar *val; 837 838 PetscFunctionBegin; 839 PetscCall(MatGetBlockSize(A, &bs)); 840 if (reuse == MAT_INITIAL_MATRIX) { 841 nz = bs2 * (aa->nz + bb->nz); 842 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 843 PetscCall(PetscMalloc1(nz, &val)); 844 mumps->nnz = nz; 845 mumps->irn = row; 846 mumps->jcn = col; 847 mumps->val = mumps->val_alloc = val; 848 } else { 849 val = mumps->val; 850 } 851 852 jj = 0; 853 irow = rstart; 854 for (i = 0; i < mbs; i++) { 855 countA = ai[i + 1] - ai[i]; 856 countB = bi[i + 1] - bi[i]; 857 ajj = aj + ai[i]; 858 bjj = bj + bi[i]; 859 v1 = av + bs2 * ai[i]; 860 v2 = bv + bs2 * bi[i]; 861 862 idx = 0; 863 /* A-part */ 864 for (k = 0; k < countA; k++) { 865 for (j = 0; j < bs; j++) { 866 for (n = 0; n < bs; n++) { 867 if (reuse == MAT_INITIAL_MATRIX) { 868 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 869 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 870 } 871 val[jj++] = v1[idx++]; 872 } 873 } 874 } 875 876 idx = 0; 877 /* B-part */ 878 for (k = 0; k < countB; k++) { 879 for (j = 0; j < bs; j++) { 880 for (n = 0; n < bs; n++) { 881 if (reuse == MAT_INITIAL_MATRIX) { 882 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 883 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 884 } 885 val[jj++] = v2[idx++]; 886 } 887 } 888 } 889 irow += bs; 890 } 891 PetscFunctionReturn(PETSC_SUCCESS); 892 } 893 894 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 895 { 896 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 897 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 898 PetscMUMPSInt *row, *col; 899 const PetscScalar *av, *bv, *v1, *v2; 900 PetscScalar *val; 901 Mat Ad, Ao; 902 Mat_SeqAIJ *aa; 903 Mat_SeqAIJ *bb; 904 #if defined(PETSC_USE_COMPLEX) 905 PetscBool hermitian, isset; 906 #endif 907 908 PetscFunctionBegin; 909 #if defined(PETSC_USE_COMPLEX) 910 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 911 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 912 #endif 913 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 914 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 915 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 916 917 aa = (Mat_SeqAIJ *)(Ad)->data; 918 bb = (Mat_SeqAIJ *)(Ao)->data; 919 ai = aa->i; 920 aj = aa->j; 921 adiag = aa->diag; 922 bi = bb->i; 923 bj = bb->j; 924 925 rstart = A->rmap->rstart; 926 927 if (reuse == MAT_INITIAL_MATRIX) { 928 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 929 nzb = 0; /* num of upper triangular entries in mat->B */ 930 for (i = 0; i < m; i++) { 931 nza += (ai[i + 1] - adiag[i]); 932 countB = bi[i + 1] - bi[i]; 933 bjj = bj + bi[i]; 934 for (j = 0; j < countB; j++) { 935 if (garray[bjj[j]] > rstart) nzb++; 936 } 937 } 938 939 nz = nza + nzb; /* total nz of upper triangular part of mat */ 940 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 941 PetscCall(PetscMalloc1(nz, &val)); 942 mumps->nnz = nz; 943 mumps->irn = row; 944 mumps->jcn = col; 945 mumps->val = mumps->val_alloc = val; 946 } else { 947 val = mumps->val; 948 } 949 950 jj = 0; 951 irow = rstart; 952 for (i = 0; i < m; i++) { 953 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 954 v1 = av + adiag[i]; 955 countA = ai[i + 1] - adiag[i]; 956 countB = bi[i + 1] - bi[i]; 957 bjj = bj + bi[i]; 958 v2 = bv + bi[i]; 959 960 /* A-part */ 961 for (j = 0; j < countA; j++) { 962 if (reuse == MAT_INITIAL_MATRIX) { 963 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 964 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 965 } 966 val[jj++] = v1[j]; 967 } 968 969 /* B-part */ 970 for (j = 0; j < countB; j++) { 971 if (garray[bjj[j]] > rstart) { 972 if (reuse == MAT_INITIAL_MATRIX) { 973 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 974 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 975 } 976 val[jj++] = v2[j]; 977 } 978 } 979 irow++; 980 } 981 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 982 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 983 PetscFunctionReturn(PETSC_SUCCESS); 984 } 985 986 PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 987 { 988 Mat **mats; 989 PetscInt nr, nc; 990 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 991 992 PetscFunctionBegin; 993 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 994 if (reuse == MAT_INITIAL_MATRIX) { 995 PetscMUMPSInt *irns, *jcns; 996 PetscScalar *vals; 997 PetscInt64 totnnz, cumnnz, maxnnz; 998 PetscInt *pjcns_w; 999 IS *rows, *cols; 1000 PetscInt **rows_idx, **cols_idx; 1001 1002 cumnnz = 0; 1003 maxnnz = 0; 1004 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1005 for (PetscInt r = 0; r < nr; r++) { 1006 for (PetscInt c = 0; c < nc; c++) { 1007 Mat sub = mats[r][c]; 1008 1009 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1010 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1011 if (sub) { 1012 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1013 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE; 1014 MatInfo info; 1015 1016 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1017 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1018 else { 1019 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1020 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1021 } 1022 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1023 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1024 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1025 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1026 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1027 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1028 1029 if (chol) { 1030 if (r == c) { 1031 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1032 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1033 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1034 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1035 } else { 1036 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1037 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1038 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1039 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1040 } 1041 } else { 1042 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1043 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1044 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1045 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1046 } 1047 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1048 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1049 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1050 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1051 maxnnz = PetscMax(maxnnz, info.nz_used); 1052 } 1053 } 1054 } 1055 1056 /* Allocate total COO */ 1057 totnnz = cumnnz; 1058 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1059 PetscCall(PetscMalloc1(totnnz, &vals)); 1060 1061 /* Handle rows and column maps 1062 We directly map rows and use an SF for the columns */ 1063 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1064 PetscCall(MatNestGetISs(A, rows, cols)); 1065 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1066 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1067 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1068 else (void)maxnnz; 1069 1070 cumnnz = 0; 1071 for (PetscInt r = 0; r < nr; r++) { 1072 for (PetscInt c = 0; c < nc; c++) { 1073 Mat sub = mats[r][c]; 1074 const PetscInt *ridx = rows_idx[r]; 1075 const PetscInt *cidx = cols_idx[c]; 1076 PetscInt rst; 1077 PetscSF csf; 1078 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1079 PetscLayout cmap; 1080 1081 mumps->nest_vals_start[r * nc + c] = cumnnz; 1082 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1083 1084 /* Extract inner blocks if needed */ 1085 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1086 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1087 else { 1088 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1089 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1090 } 1091 swap = (PetscBool)(isTrans || isHTrans); 1092 1093 /* Get column layout to map off-process columns */ 1094 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1095 1096 /* Get row start to map on-process rows */ 1097 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1098 1099 /* Directly use the mumps datastructure and use C ordering for now */ 1100 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1101 1102 /* Swap the role of rows and columns indices for transposed blocks 1103 since we need values with global final ordering */ 1104 if (swap) { 1105 cidx = rows_idx[r]; 1106 ridx = cols_idx[c]; 1107 } 1108 1109 /* Communicate column indices 1110 This could have been done with a single SF but it would have complicated the code a lot. 1111 But since we do it only once, we pay the price of setting up an SF for each block */ 1112 if (PetscDefined(USE_64BIT_INDICES)) { 1113 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1114 } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 1115 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1116 PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1117 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1118 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1119 PetscCall(PetscSFDestroy(&csf)); 1120 1121 /* Import indices: use direct map for rows and mapped indices for columns */ 1122 if (swap) { 1123 for (PetscInt k = 0; k < mumps->nnz; k++) { 1124 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1125 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1126 } 1127 } else { 1128 for (PetscInt k = 0; k < mumps->nnz; k++) { 1129 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1130 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1131 } 1132 } 1133 1134 /* Import values to full COO */ 1135 if (isHTrans) { /* conjugate the entries */ 1136 for (PetscInt k = 0; k < mumps->nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1137 } 1138 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1139 1140 /* Shift new starting point and sanity check */ 1141 cumnnz += mumps->nnz; 1142 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1143 1144 /* Free scratch memory */ 1145 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1146 PetscCall(PetscFree(mumps->val_alloc)); 1147 mumps->val = NULL; 1148 mumps->nnz = 0; 1149 } 1150 } 1151 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1152 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1153 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1154 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1155 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1156 mumps->nest_vals_start[nr * nc] = cumnnz; 1157 1158 /* Set pointers for final MUMPS data structure */ 1159 mumps->nest_vals = vals; 1160 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1161 mumps->val = vals; 1162 mumps->irn = irns; 1163 mumps->jcn = jcns; 1164 mumps->nnz = cumnnz; 1165 } else { 1166 PetscScalar *oval = mumps->nest_vals; 1167 for (PetscInt r = 0; r < nr; r++) { 1168 for (PetscInt c = 0; c < nc; c++) { 1169 PetscBool isTrans, isHTrans = PETSC_FALSE; 1170 Mat sub = mats[r][c]; 1171 PetscInt midx = r * nc + c; 1172 1173 if (!mumps->nest_convert_to_triples[midx]) continue; 1174 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1175 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1176 else { 1177 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1178 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1179 } 1180 mumps->val = oval + mumps->nest_vals_start[midx]; 1181 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1182 if (isHTrans) { 1183 PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1184 for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1185 } 1186 } 1187 } 1188 mumps->val = oval; 1189 } 1190 PetscFunctionReturn(PETSC_SUCCESS); 1191 } 1192 1193 PetscErrorCode MatDestroy_MUMPS(Mat A) 1194 { 1195 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1196 1197 PetscFunctionBegin; 1198 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1199 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1200 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1201 PetscCall(VecDestroy(&mumps->b_seq)); 1202 PetscCall(VecDestroy(&mumps->x_seq)); 1203 PetscCall(PetscFree(mumps->id.perm_in)); 1204 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1205 PetscCall(PetscFree(mumps->val_alloc)); 1206 PetscCall(PetscFree(mumps->info)); 1207 PetscCall(PetscFree(mumps->ICNTL_pre)); 1208 PetscCall(PetscFree(mumps->CNTL_pre)); 1209 PetscCall(MatMumpsResetSchur_Private(mumps)); 1210 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1211 mumps->id.job = JOB_END; 1212 PetscMUMPS_c(mumps); 1213 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1214 if (mumps->mumps_comm != MPI_COMM_NULL) { 1215 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1216 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1217 } 1218 } 1219 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1220 if (mumps->use_petsc_omp_support) { 1221 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1222 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1223 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1224 } 1225 #endif 1226 PetscCall(PetscFree(mumps->ia_alloc)); 1227 PetscCall(PetscFree(mumps->ja_alloc)); 1228 PetscCall(PetscFree(mumps->recvcount)); 1229 PetscCall(PetscFree(mumps->reqs)); 1230 PetscCall(PetscFree(mumps->irhs_loc)); 1231 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1232 PetscCall(PetscFree(mumps->nest_vals)); 1233 PetscCall(PetscFree(A->data)); 1234 1235 /* clear composed functions */ 1236 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1237 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1238 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1239 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1240 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1241 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1242 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1243 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1244 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1245 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1246 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1247 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1248 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1249 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1250 PetscFunctionReturn(PETSC_SUCCESS); 1251 } 1252 1253 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1254 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1255 { 1256 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1257 const PetscMPIInt ompsize = mumps->omp_comm_size; 1258 PetscInt i, m, M, rstart; 1259 1260 PetscFunctionBegin; 1261 PetscCall(MatGetSize(A, &M, NULL)); 1262 PetscCall(MatGetLocalSize(A, &m, NULL)); 1263 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1264 if (ompsize == 1) { 1265 if (!mumps->irhs_loc) { 1266 mumps->nloc_rhs = m; 1267 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1268 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1269 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1270 } 1271 mumps->id.rhs_loc = (MumpsScalar *)array; 1272 } else { 1273 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1274 const PetscInt *ranges; 1275 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1276 MPI_Group petsc_group, omp_group; 1277 PetscScalar *recvbuf = NULL; 1278 1279 if (mumps->is_omp_master) { 1280 /* Lazily initialize the omp stuff for distributed rhs */ 1281 if (!mumps->irhs_loc) { 1282 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1283 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1284 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1285 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1286 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1287 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1288 1289 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1290 mumps->nloc_rhs = 0; 1291 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1292 for (j = 0; j < ompsize; j++) { 1293 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1294 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1295 } 1296 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1297 for (j = k = 0; j < ompsize; j++) { 1298 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1299 } 1300 1301 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1302 PetscCallMPI(MPI_Group_free(&petsc_group)); 1303 PetscCallMPI(MPI_Group_free(&omp_group)); 1304 } 1305 1306 /* Realloc buffers when current nrhs is bigger than what we have met */ 1307 if (nrhs > mumps->max_nrhs) { 1308 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1309 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1310 mumps->max_nrhs = nrhs; 1311 } 1312 1313 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1314 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1315 mumps->rhs_disps[0] = 0; 1316 for (j = 1; j < ompsize; j++) { 1317 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1318 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1319 } 1320 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1321 } 1322 1323 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1324 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1325 1326 if (mumps->is_omp_master) { 1327 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1328 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1329 for (j = 0; j < ompsize; j++) { 1330 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1331 dst = dstbase; 1332 for (i = 0; i < nrhs; i++) { 1333 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1334 src += mumps->rhs_nrow[j]; 1335 dst += mumps->nloc_rhs; 1336 } 1337 dstbase += mumps->rhs_nrow[j]; 1338 } 1339 } 1340 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1341 } 1342 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1343 } 1344 mumps->id.nrhs = nrhs; 1345 mumps->id.nloc_rhs = mumps->nloc_rhs; 1346 mumps->id.lrhs_loc = mumps->nloc_rhs; 1347 mumps->id.irhs_loc = mumps->irhs_loc; 1348 PetscFunctionReturn(PETSC_SUCCESS); 1349 } 1350 1351 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1352 { 1353 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1354 const PetscScalar *rarray = NULL; 1355 PetscScalar *array; 1356 IS is_iden, is_petsc; 1357 PetscInt i; 1358 PetscBool second_solve = PETSC_FALSE; 1359 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1360 1361 PetscFunctionBegin; 1362 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1363 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1364 &cite1)); 1365 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1366 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1367 &cite2)); 1368 1369 if (A->factorerrortype) { 1370 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1371 PetscCall(VecSetInf(x)); 1372 PetscFunctionReturn(PETSC_SUCCESS); 1373 } 1374 1375 mumps->id.nrhs = 1; 1376 if (mumps->petsc_size > 1) { 1377 if (mumps->ICNTL20 == 10) { 1378 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1379 PetscCall(VecGetArrayRead(b, &rarray)); 1380 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1381 } else { 1382 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1383 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1384 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1385 if (!mumps->myid) { 1386 PetscCall(VecGetArray(mumps->b_seq, &array)); 1387 mumps->id.rhs = (MumpsScalar *)array; 1388 } 1389 } 1390 } else { /* petsc_size == 1 */ 1391 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1392 PetscCall(VecCopy(b, x)); 1393 PetscCall(VecGetArray(x, &array)); 1394 mumps->id.rhs = (MumpsScalar *)array; 1395 } 1396 1397 /* 1398 handle condensation step of Schur complement (if any) 1399 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1400 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1401 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1402 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1403 */ 1404 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1405 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1406 second_solve = PETSC_TRUE; 1407 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1408 } 1409 /* solve phase */ 1410 mumps->id.job = JOB_SOLVE; 1411 PetscMUMPS_c(mumps); 1412 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1413 1414 /* handle expansion step of Schur complement (if any) */ 1415 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1416 1417 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1418 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1419 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1420 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1421 } 1422 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1423 PetscInt *isol2_loc = NULL; 1424 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1425 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1426 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1427 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1428 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1429 PetscCall(ISDestroy(&is_iden)); 1430 PetscCall(ISDestroy(&is_petsc)); 1431 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1432 } 1433 1434 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1435 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1436 } 1437 1438 if (mumps->petsc_size > 1) { 1439 if (mumps->ICNTL20 == 10) { 1440 PetscCall(VecRestoreArrayRead(b, &rarray)); 1441 } else if (!mumps->myid) { 1442 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1443 } 1444 } else PetscCall(VecRestoreArray(x, &array)); 1445 1446 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1447 PetscFunctionReturn(PETSC_SUCCESS); 1448 } 1449 1450 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1451 { 1452 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1453 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1454 1455 PetscFunctionBegin; 1456 mumps->id.ICNTL(9) = 0; 1457 PetscCall(MatSolve_MUMPS(A, b, x)); 1458 mumps->id.ICNTL(9) = value; 1459 PetscFunctionReturn(PETSC_SUCCESS); 1460 } 1461 1462 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1463 { 1464 Mat Bt = NULL; 1465 PetscBool denseX, denseB, flg, flgT; 1466 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1467 PetscInt i, nrhs, M; 1468 PetscScalar *array; 1469 const PetscScalar *rbray; 1470 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1471 PetscMUMPSInt *isol_loc, *isol_loc_save; 1472 PetscScalar *bray, *sol_loc, *sol_loc_save; 1473 IS is_to, is_from; 1474 PetscInt k, proc, j, m, myrstart; 1475 const PetscInt *rstart; 1476 Vec v_mpi, msol_loc; 1477 VecScatter scat_sol; 1478 Vec b_seq; 1479 VecScatter scat_rhs; 1480 PetscScalar *aa; 1481 PetscInt spnr, *ia, *ja; 1482 Mat_MPIAIJ *b = NULL; 1483 1484 PetscFunctionBegin; 1485 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1486 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1487 1488 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1489 if (denseB) { 1490 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1491 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1492 } else { /* sparse B */ 1493 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1494 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1495 if (flgT) { /* input B is transpose of actual RHS matrix, 1496 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1497 PetscCall(MatTransposeGetMat(B, &Bt)); 1498 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1499 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1500 } 1501 1502 PetscCall(MatGetSize(B, &M, &nrhs)); 1503 mumps->id.nrhs = nrhs; 1504 mumps->id.lrhs = M; 1505 mumps->id.rhs = NULL; 1506 1507 if (mumps->petsc_size == 1) { 1508 PetscScalar *aa; 1509 PetscInt spnr, *ia, *ja; 1510 PetscBool second_solve = PETSC_FALSE; 1511 1512 PetscCall(MatDenseGetArray(X, &array)); 1513 mumps->id.rhs = (MumpsScalar *)array; 1514 1515 if (denseB) { 1516 /* copy B to X */ 1517 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1518 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1519 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1520 } else { /* sparse B */ 1521 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1522 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1523 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1524 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1525 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1526 } 1527 /* handle condensation step of Schur complement (if any) */ 1528 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1529 second_solve = PETSC_TRUE; 1530 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1531 } 1532 /* solve phase */ 1533 mumps->id.job = JOB_SOLVE; 1534 PetscMUMPS_c(mumps); 1535 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1536 1537 /* handle expansion step of Schur complement (if any) */ 1538 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1539 if (!denseB) { /* sparse B */ 1540 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1541 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1542 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1543 } 1544 PetscCall(MatDenseRestoreArray(X, &array)); 1545 PetscFunctionReturn(PETSC_SUCCESS); 1546 } 1547 1548 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1549 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1550 1551 /* create msol_loc to hold mumps local solution */ 1552 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1553 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1554 1555 lsol_loc = mumps->id.lsol_loc; 1556 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1557 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1558 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1559 mumps->id.isol_loc = isol_loc; 1560 1561 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1562 1563 if (denseB) { 1564 if (mumps->ICNTL20 == 10) { 1565 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1566 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1567 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1568 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1569 PetscCall(MatGetLocalSize(B, &m, NULL)); 1570 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1571 } else { 1572 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1573 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1574 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1575 0, re-arrange B into desired order, which is a local operation. 1576 */ 1577 1578 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1579 /* wrap dense rhs matrix B into a vector v_mpi */ 1580 PetscCall(MatGetLocalSize(B, &m, NULL)); 1581 PetscCall(MatDenseGetArray(B, &bray)); 1582 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1583 PetscCall(MatDenseRestoreArray(B, &bray)); 1584 1585 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1586 if (!mumps->myid) { 1587 PetscInt *idx; 1588 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1589 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1590 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1591 k = 0; 1592 for (proc = 0; proc < mumps->petsc_size; proc++) { 1593 for (j = 0; j < nrhs; j++) { 1594 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1595 } 1596 } 1597 1598 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1599 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1600 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1601 } else { 1602 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1603 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1604 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1605 } 1606 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1607 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1608 PetscCall(ISDestroy(&is_to)); 1609 PetscCall(ISDestroy(&is_from)); 1610 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1611 1612 if (!mumps->myid) { /* define rhs on the host */ 1613 PetscCall(VecGetArray(b_seq, &bray)); 1614 mumps->id.rhs = (MumpsScalar *)bray; 1615 PetscCall(VecRestoreArray(b_seq, &bray)); 1616 } 1617 } 1618 } else { /* sparse B */ 1619 b = (Mat_MPIAIJ *)Bt->data; 1620 1621 /* wrap dense X into a vector v_mpi */ 1622 PetscCall(MatGetLocalSize(X, &m, NULL)); 1623 PetscCall(MatDenseGetArray(X, &bray)); 1624 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1625 PetscCall(MatDenseRestoreArray(X, &bray)); 1626 1627 if (!mumps->myid) { 1628 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1629 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1630 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1631 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1632 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1633 } else { 1634 mumps->id.irhs_ptr = NULL; 1635 mumps->id.irhs_sparse = NULL; 1636 mumps->id.nz_rhs = 0; 1637 mumps->id.rhs_sparse = NULL; 1638 } 1639 } 1640 1641 /* solve phase */ 1642 mumps->id.job = JOB_SOLVE; 1643 PetscMUMPS_c(mumps); 1644 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1645 1646 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1647 PetscCall(MatDenseGetArray(X, &array)); 1648 PetscCall(VecPlaceArray(v_mpi, array)); 1649 1650 /* create scatter scat_sol */ 1651 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1652 /* iidx: index for scatter mumps solution to petsc X */ 1653 1654 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1655 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1656 for (i = 0; i < lsol_loc; i++) { 1657 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1658 1659 for (proc = 0; proc < mumps->petsc_size; proc++) { 1660 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1661 myrstart = rstart[proc]; 1662 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1663 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1664 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1665 break; 1666 } 1667 } 1668 1669 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1670 } 1671 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1672 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1673 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1674 PetscCall(ISDestroy(&is_from)); 1675 PetscCall(ISDestroy(&is_to)); 1676 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1677 PetscCall(MatDenseRestoreArray(X, &array)); 1678 1679 /* free spaces */ 1680 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1681 mumps->id.isol_loc = isol_loc_save; 1682 1683 PetscCall(PetscFree2(sol_loc, isol_loc)); 1684 PetscCall(PetscFree(idxx)); 1685 PetscCall(VecDestroy(&msol_loc)); 1686 PetscCall(VecDestroy(&v_mpi)); 1687 if (!denseB) { 1688 if (!mumps->myid) { 1689 b = (Mat_MPIAIJ *)Bt->data; 1690 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1691 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1692 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1693 } 1694 } else { 1695 if (mumps->ICNTL20 == 0) { 1696 PetscCall(VecDestroy(&b_seq)); 1697 PetscCall(VecScatterDestroy(&scat_rhs)); 1698 } 1699 } 1700 PetscCall(VecScatterDestroy(&scat_sol)); 1701 PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)))); 1702 PetscFunctionReturn(PETSC_SUCCESS); 1703 } 1704 1705 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1706 { 1707 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1708 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1709 1710 PetscFunctionBegin; 1711 mumps->id.ICNTL(9) = 0; 1712 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1713 mumps->id.ICNTL(9) = value; 1714 PetscFunctionReturn(PETSC_SUCCESS); 1715 } 1716 1717 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1718 { 1719 PetscBool flg; 1720 Mat B; 1721 1722 PetscFunctionBegin; 1723 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1724 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1725 1726 /* Create B=Bt^T that uses Bt's data structure */ 1727 PetscCall(MatCreateTranspose(Bt, &B)); 1728 1729 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1730 PetscCall(MatDestroy(&B)); 1731 PetscFunctionReturn(PETSC_SUCCESS); 1732 } 1733 1734 #if !defined(PETSC_USE_COMPLEX) 1735 /* 1736 input: 1737 F: numeric factor 1738 output: 1739 nneg: total number of negative pivots 1740 nzero: total number of zero pivots 1741 npos: (global dimension of F) - nneg - nzero 1742 */ 1743 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1744 { 1745 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1746 PetscMPIInt size; 1747 1748 PetscFunctionBegin; 1749 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1750 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1751 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1752 1753 if (nneg) *nneg = mumps->id.INFOG(12); 1754 if (nzero || npos) { 1755 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1756 if (nzero) *nzero = mumps->id.INFOG(28); 1757 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1758 } 1759 PetscFunctionReturn(PETSC_SUCCESS); 1760 } 1761 #endif 1762 1763 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1764 { 1765 PetscInt i, nreqs; 1766 PetscMUMPSInt *irn, *jcn; 1767 PetscMPIInt count; 1768 PetscInt64 totnnz, remain; 1769 const PetscInt osize = mumps->omp_comm_size; 1770 PetscScalar *val; 1771 1772 PetscFunctionBegin; 1773 if (osize > 1) { 1774 if (reuse == MAT_INITIAL_MATRIX) { 1775 /* master first gathers counts of nonzeros to receive */ 1776 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1777 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1778 1779 /* Then each computes number of send/recvs */ 1780 if (mumps->is_omp_master) { 1781 /* Start from 1 since self communication is not done in MPI */ 1782 nreqs = 0; 1783 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1784 } else { 1785 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1786 } 1787 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1788 1789 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1790 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1791 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1792 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1793 */ 1794 nreqs = 0; /* counter for actual send/recvs */ 1795 if (mumps->is_omp_master) { 1796 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1797 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1798 PetscCall(PetscMalloc1(totnnz, &val)); 1799 1800 /* Self communication */ 1801 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1802 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1803 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1804 1805 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1806 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1807 PetscCall(PetscFree(mumps->val_alloc)); 1808 mumps->nnz = totnnz; 1809 mumps->irn = irn; 1810 mumps->jcn = jcn; 1811 mumps->val = mumps->val_alloc = val; 1812 1813 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1814 jcn += mumps->recvcount[0]; 1815 val += mumps->recvcount[0]; 1816 1817 /* Remote communication */ 1818 for (i = 1; i < osize; i++) { 1819 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1820 remain = mumps->recvcount[i] - count; 1821 while (count > 0) { 1822 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1823 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1824 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1825 irn += count; 1826 jcn += count; 1827 val += count; 1828 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1829 remain -= count; 1830 } 1831 } 1832 } else { 1833 irn = mumps->irn; 1834 jcn = mumps->jcn; 1835 val = mumps->val; 1836 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1837 remain = mumps->nnz - count; 1838 while (count > 0) { 1839 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1840 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1841 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1842 irn += count; 1843 jcn += count; 1844 val += count; 1845 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1846 remain -= count; 1847 } 1848 } 1849 } else { 1850 nreqs = 0; 1851 if (mumps->is_omp_master) { 1852 val = mumps->val + mumps->recvcount[0]; 1853 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1854 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1855 remain = mumps->recvcount[i] - count; 1856 while (count > 0) { 1857 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1858 val += count; 1859 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1860 remain -= count; 1861 } 1862 } 1863 } else { 1864 val = mumps->val; 1865 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1866 remain = mumps->nnz - count; 1867 while (count > 0) { 1868 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1869 val += count; 1870 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1871 remain -= count; 1872 } 1873 } 1874 } 1875 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1876 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1877 } 1878 PetscFunctionReturn(PETSC_SUCCESS); 1879 } 1880 1881 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1882 { 1883 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1884 PetscBool isMPIAIJ; 1885 1886 PetscFunctionBegin; 1887 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1888 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1889 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1890 PetscFunctionReturn(PETSC_SUCCESS); 1891 } 1892 1893 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1894 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1895 1896 /* numerical factorization phase */ 1897 mumps->id.job = JOB_FACTNUMERIC; 1898 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1899 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1900 } else { 1901 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1902 } 1903 PetscMUMPS_c(mumps); 1904 if (mumps->id.INFOG(1) < 0) { 1905 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 1906 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1907 PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1908 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1909 } else if (mumps->id.INFOG(1) == -13) { 1910 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1911 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1912 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1913 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1914 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1915 } else { 1916 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1917 F->factorerrortype = MAT_FACTOR_OTHER; 1918 } 1919 } 1920 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, " mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16)); 1921 1922 F->assembled = PETSC_TRUE; 1923 1924 if (F->schur) { /* reset Schur status to unfactored */ 1925 #if defined(PETSC_HAVE_CUDA) 1926 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1927 #endif 1928 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1929 mumps->id.ICNTL(19) = 2; 1930 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1931 } 1932 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1933 } 1934 1935 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1936 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1937 1938 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1939 if (mumps->petsc_size > 1) { 1940 PetscInt lsol_loc; 1941 PetscScalar *sol_loc; 1942 1943 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1944 1945 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1946 if (mumps->x_seq) { 1947 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1948 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1949 PetscCall(VecDestroy(&mumps->x_seq)); 1950 } 1951 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1952 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1953 mumps->id.lsol_loc = lsol_loc; 1954 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1955 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 1956 } 1957 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1958 PetscFunctionReturn(PETSC_SUCCESS); 1959 } 1960 1961 /* Sets MUMPS options from the options database */ 1962 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1963 { 1964 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1965 PetscMUMPSInt icntl = 0, size, *listvar_schur; 1966 PetscInt info[80], i, ninfo = 80, rbs, cbs; 1967 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1968 MumpsScalar *arr; 1969 1970 PetscFunctionBegin; 1971 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 1972 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1973 PetscInt nthreads = 0; 1974 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1975 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1976 1977 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1978 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 1979 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 1980 1981 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 1982 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1983 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1984 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 1985 if (mumps->use_petsc_omp_support) { 1986 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 1987 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1988 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1989 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1990 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 1991 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 1992 #endif 1993 } else { 1994 mumps->omp_comm = PETSC_COMM_SELF; 1995 mumps->mumps_comm = mumps->petsc_comm; 1996 mumps->is_omp_master = PETSC_TRUE; 1997 } 1998 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 1999 mumps->reqs = NULL; 2000 mumps->tag = 0; 2001 2002 if (mumps->mumps_comm != MPI_COMM_NULL) { 2003 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2004 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2005 MPI_Comm comm; 2006 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2007 mumps->mumps_comm = comm; 2008 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2009 } 2010 2011 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2012 mumps->id.job = JOB_INIT; 2013 mumps->id.par = 1; /* host participates factorizaton and solve */ 2014 mumps->id.sym = mumps->sym; 2015 2016 size = mumps->id.size_schur; 2017 arr = mumps->id.schur; 2018 listvar_schur = mumps->id.listvar_schur; 2019 PetscMUMPS_c(mumps); 2020 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 2021 /* restore cached ICNTL and CNTL values */ 2022 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2023 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2024 PetscCall(PetscFree(mumps->ICNTL_pre)); 2025 PetscCall(PetscFree(mumps->CNTL_pre)); 2026 2027 if (schur) { 2028 mumps->id.size_schur = size; 2029 mumps->id.schur_lld = size; 2030 mumps->id.schur = arr; 2031 mumps->id.listvar_schur = listvar_schur; 2032 if (mumps->petsc_size > 1) { 2033 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2034 2035 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2036 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2037 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2038 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2039 } else { 2040 if (F->factortype == MAT_FACTOR_LU) { 2041 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2042 } else { 2043 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2044 } 2045 } 2046 mumps->id.ICNTL(26) = -1; 2047 } 2048 2049 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2050 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2051 */ 2052 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2053 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2054 2055 mumps->scat_rhs = NULL; 2056 mumps->scat_sol = NULL; 2057 2058 /* set PETSc-MUMPS default options - override MUMPS default */ 2059 mumps->id.ICNTL(3) = 0; 2060 mumps->id.ICNTL(4) = 0; 2061 if (mumps->petsc_size == 1) { 2062 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2063 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2064 } else { 2065 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2066 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2067 } 2068 } 2069 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2070 if (flg) mumps->id.ICNTL(1) = icntl; 2071 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2072 if (flg) mumps->id.ICNTL(2) = icntl; 2073 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2074 if (flg) mumps->id.ICNTL(3) = icntl; 2075 2076 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2077 if (flg) mumps->id.ICNTL(4) = icntl; 2078 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2079 2080 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2081 if (flg) mumps->id.ICNTL(6) = icntl; 2082 2083 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2084 if (flg) { 2085 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2086 mumps->id.ICNTL(7) = icntl; 2087 } 2088 2089 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2090 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2091 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2092 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2093 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2094 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2095 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2096 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2097 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2098 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2099 if (flg) { 2100 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2101 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2102 } 2103 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2104 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2105 PetscCall(MatDestroy(&F->schur)); 2106 PetscCall(MatMumpsResetSchur_Private(mumps)); 2107 } 2108 2109 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2110 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2111 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2112 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2113 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2114 In short, we could not use distributed RHS with MPICH until v4.0b1. 2115 */ 2116 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 2117 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2118 #else 2119 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2120 #endif 2121 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2122 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2123 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2124 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2125 #endif 2126 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2127 2128 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2129 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2130 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2131 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2132 2133 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2134 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2135 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2136 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2137 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2138 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2139 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2140 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2141 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2142 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2143 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2144 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2145 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2146 2147 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2148 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2149 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2150 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2151 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2152 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2153 2154 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2155 2156 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2157 if (ninfo) { 2158 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2159 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2160 mumps->ninfo = ninfo; 2161 for (i = 0; i < ninfo; i++) { 2162 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2163 mumps->info[i] = info[i]; 2164 } 2165 } 2166 PetscOptionsEnd(); 2167 PetscFunctionReturn(PETSC_SUCCESS); 2168 } 2169 2170 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2171 { 2172 PetscFunctionBegin; 2173 if (mumps->id.INFOG(1) < 0) { 2174 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1)); 2175 if (mumps->id.INFOG(1) == -6) { 2176 PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2177 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2178 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2179 PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2180 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2181 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2182 PetscCall(PetscInfo(F, "Empty matrix\n")); 2183 } else { 2184 PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2185 F->factorerrortype = MAT_FACTOR_OTHER; 2186 } 2187 } 2188 PetscFunctionReturn(PETSC_SUCCESS); 2189 } 2190 2191 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2192 { 2193 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2194 Vec b; 2195 const PetscInt M = A->rmap->N; 2196 2197 PetscFunctionBegin; 2198 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2199 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2200 PetscFunctionReturn(PETSC_SUCCESS); 2201 } 2202 2203 /* Set MUMPS options from the options database */ 2204 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2205 2206 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2207 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2208 2209 /* analysis phase */ 2210 mumps->id.job = JOB_FACTSYMBOLIC; 2211 mumps->id.n = M; 2212 switch (mumps->id.ICNTL(18)) { 2213 case 0: /* centralized assembled matrix input */ 2214 if (!mumps->myid) { 2215 mumps->id.nnz = mumps->nnz; 2216 mumps->id.irn = mumps->irn; 2217 mumps->id.jcn = mumps->jcn; 2218 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2219 if (r) { 2220 mumps->id.ICNTL(7) = 1; 2221 if (!mumps->myid) { 2222 const PetscInt *idx; 2223 PetscInt i; 2224 2225 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2226 PetscCall(ISGetIndices(r, &idx)); 2227 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2228 PetscCall(ISRestoreIndices(r, &idx)); 2229 } 2230 } 2231 } 2232 break; 2233 case 3: /* distributed assembled matrix input (size>1) */ 2234 mumps->id.nnz_loc = mumps->nnz; 2235 mumps->id.irn_loc = mumps->irn; 2236 mumps->id.jcn_loc = mumps->jcn; 2237 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2238 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2239 PetscCall(MatCreateVecs(A, NULL, &b)); 2240 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2241 PetscCall(VecDestroy(&b)); 2242 } 2243 break; 2244 } 2245 PetscMUMPS_c(mumps); 2246 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2247 2248 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2249 F->ops->solve = MatSolve_MUMPS; 2250 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2251 F->ops->matsolve = MatMatSolve_MUMPS; 2252 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2253 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2254 2255 mumps->matstruc = SAME_NONZERO_PATTERN; 2256 PetscFunctionReturn(PETSC_SUCCESS); 2257 } 2258 2259 /* Note the Petsc r and c permutations are ignored */ 2260 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2261 { 2262 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2263 Vec b; 2264 const PetscInt M = A->rmap->N; 2265 2266 PetscFunctionBegin; 2267 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2268 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2269 PetscFunctionReturn(PETSC_SUCCESS); 2270 } 2271 2272 /* Set MUMPS options from the options database */ 2273 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2274 2275 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2276 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2277 2278 /* analysis phase */ 2279 mumps->id.job = JOB_FACTSYMBOLIC; 2280 mumps->id.n = M; 2281 switch (mumps->id.ICNTL(18)) { 2282 case 0: /* centralized assembled matrix input */ 2283 if (!mumps->myid) { 2284 mumps->id.nnz = mumps->nnz; 2285 mumps->id.irn = mumps->irn; 2286 mumps->id.jcn = mumps->jcn; 2287 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2288 } 2289 break; 2290 case 3: /* distributed assembled matrix input (size>1) */ 2291 mumps->id.nnz_loc = mumps->nnz; 2292 mumps->id.irn_loc = mumps->irn; 2293 mumps->id.jcn_loc = mumps->jcn; 2294 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2295 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2296 PetscCall(MatCreateVecs(A, NULL, &b)); 2297 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2298 PetscCall(VecDestroy(&b)); 2299 } 2300 break; 2301 } 2302 PetscMUMPS_c(mumps); 2303 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2304 2305 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2306 F->ops->solve = MatSolve_MUMPS; 2307 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2308 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2309 2310 mumps->matstruc = SAME_NONZERO_PATTERN; 2311 PetscFunctionReturn(PETSC_SUCCESS); 2312 } 2313 2314 /* Note the Petsc r permutation and factor info are ignored */ 2315 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2316 { 2317 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2318 Vec b; 2319 const PetscInt M = A->rmap->N; 2320 2321 PetscFunctionBegin; 2322 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2323 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2324 PetscFunctionReturn(PETSC_SUCCESS); 2325 } 2326 2327 /* Set MUMPS options from the options database */ 2328 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2329 2330 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2331 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2332 2333 /* analysis phase */ 2334 mumps->id.job = JOB_FACTSYMBOLIC; 2335 mumps->id.n = M; 2336 switch (mumps->id.ICNTL(18)) { 2337 case 0: /* centralized assembled matrix input */ 2338 if (!mumps->myid) { 2339 mumps->id.nnz = mumps->nnz; 2340 mumps->id.irn = mumps->irn; 2341 mumps->id.jcn = mumps->jcn; 2342 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2343 } 2344 break; 2345 case 3: /* distributed assembled matrix input (size>1) */ 2346 mumps->id.nnz_loc = mumps->nnz; 2347 mumps->id.irn_loc = mumps->irn; 2348 mumps->id.jcn_loc = mumps->jcn; 2349 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2350 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2351 PetscCall(MatCreateVecs(A, NULL, &b)); 2352 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2353 PetscCall(VecDestroy(&b)); 2354 } 2355 break; 2356 } 2357 PetscMUMPS_c(mumps); 2358 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2359 2360 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2361 F->ops->solve = MatSolve_MUMPS; 2362 F->ops->solvetranspose = MatSolve_MUMPS; 2363 F->ops->matsolve = MatMatSolve_MUMPS; 2364 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2365 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2366 #if defined(PETSC_USE_COMPLEX) 2367 F->ops->getinertia = NULL; 2368 #else 2369 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2370 #endif 2371 2372 mumps->matstruc = SAME_NONZERO_PATTERN; 2373 PetscFunctionReturn(PETSC_SUCCESS); 2374 } 2375 2376 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2377 { 2378 PetscBool iascii; 2379 PetscViewerFormat format; 2380 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2381 2382 PetscFunctionBegin; 2383 /* check if matrix is mumps type */ 2384 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2385 2386 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2387 if (iascii) { 2388 PetscCall(PetscViewerGetFormat(viewer, &format)); 2389 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2390 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2391 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2392 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2393 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2394 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2395 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2396 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2397 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2398 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2399 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2400 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2401 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2402 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2403 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2404 if (mumps->id.ICNTL(11) > 0) { 2405 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2406 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2407 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2408 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2409 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2410 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2411 } 2412 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2413 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2414 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2415 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2416 /* ICNTL(15-17) not used */ 2417 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2418 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2419 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2420 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2421 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2422 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2423 2424 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2425 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2426 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2427 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2428 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2429 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2430 2431 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2432 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2433 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2434 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2435 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2436 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2437 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2438 2439 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2440 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2441 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2442 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2443 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2444 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2445 2446 /* information local to each processor */ 2447 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2448 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2449 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2450 PetscCall(PetscViewerFlush(viewer)); 2451 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2452 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2453 PetscCall(PetscViewerFlush(viewer)); 2454 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2455 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2456 PetscCall(PetscViewerFlush(viewer)); 2457 2458 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2459 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2460 PetscCall(PetscViewerFlush(viewer)); 2461 2462 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2463 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2464 PetscCall(PetscViewerFlush(viewer)); 2465 2466 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2467 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2468 PetscCall(PetscViewerFlush(viewer)); 2469 2470 if (mumps->ninfo && mumps->ninfo <= 80) { 2471 PetscInt i; 2472 for (i = 0; i < mumps->ninfo; i++) { 2473 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2474 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2475 PetscCall(PetscViewerFlush(viewer)); 2476 } 2477 } 2478 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2479 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2480 2481 if (mumps->myid == 0) { /* information from the host */ 2482 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2483 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2484 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2485 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2486 2487 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2488 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2489 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2490 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2491 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2492 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2493 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2494 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2495 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2496 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2497 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2498 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2499 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2500 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2501 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2502 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2503 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2504 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2505 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2506 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2507 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2508 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2509 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2510 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2511 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2512 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2513 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2514 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2515 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2516 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2517 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2518 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2519 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2520 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2521 } 2522 } 2523 } 2524 PetscFunctionReturn(PETSC_SUCCESS); 2525 } 2526 2527 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2528 { 2529 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2530 2531 PetscFunctionBegin; 2532 info->block_size = 1.0; 2533 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2534 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2535 info->nz_unneeded = 0.0; 2536 info->assemblies = 0.0; 2537 info->mallocs = 0.0; 2538 info->memory = 0.0; 2539 info->fill_ratio_given = 0; 2540 info->fill_ratio_needed = 0; 2541 info->factor_mallocs = 0; 2542 PetscFunctionReturn(PETSC_SUCCESS); 2543 } 2544 2545 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2546 { 2547 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2548 const PetscScalar *arr; 2549 const PetscInt *idxs; 2550 PetscInt size, i; 2551 2552 PetscFunctionBegin; 2553 PetscCall(ISGetLocalSize(is, &size)); 2554 /* Schur complement matrix */ 2555 PetscCall(MatDestroy(&F->schur)); 2556 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2557 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2558 mumps->id.schur = (MumpsScalar *)arr; 2559 mumps->id.size_schur = size; 2560 mumps->id.schur_lld = size; 2561 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2562 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2563 2564 /* MUMPS expects Fortran style indices */ 2565 PetscCall(PetscFree(mumps->id.listvar_schur)); 2566 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2567 PetscCall(ISGetIndices(is, &idxs)); 2568 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2569 PetscCall(ISRestoreIndices(is, &idxs)); 2570 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2571 mumps->id.ICNTL(26) = -1; 2572 PetscFunctionReturn(PETSC_SUCCESS); 2573 } 2574 2575 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2576 { 2577 Mat St; 2578 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2579 PetscScalar *array; 2580 #if defined(PETSC_USE_COMPLEX) 2581 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2582 #endif 2583 2584 PetscFunctionBegin; 2585 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2586 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2587 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2588 PetscCall(MatSetType(St, MATDENSE)); 2589 PetscCall(MatSetUp(St)); 2590 PetscCall(MatDenseGetArray(St, &array)); 2591 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2592 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2593 PetscInt i, j, N = mumps->id.size_schur; 2594 for (i = 0; i < N; i++) { 2595 for (j = 0; j < N; j++) { 2596 #if !defined(PETSC_USE_COMPLEX) 2597 PetscScalar val = mumps->id.schur[i * N + j]; 2598 #else 2599 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2600 #endif 2601 array[j * N + i] = val; 2602 } 2603 } 2604 } else { /* stored by columns */ 2605 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2606 } 2607 } else { /* either full or lower-triangular (not packed) */ 2608 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2609 PetscInt i, j, N = mumps->id.size_schur; 2610 for (i = 0; i < N; i++) { 2611 for (j = i; j < N; j++) { 2612 #if !defined(PETSC_USE_COMPLEX) 2613 PetscScalar val = mumps->id.schur[i * N + j]; 2614 #else 2615 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2616 #endif 2617 array[i * N + j] = val; 2618 array[j * N + i] = val; 2619 } 2620 } 2621 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2622 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2623 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2624 PetscInt i, j, N = mumps->id.size_schur; 2625 for (i = 0; i < N; i++) { 2626 for (j = 0; j < i + 1; j++) { 2627 #if !defined(PETSC_USE_COMPLEX) 2628 PetscScalar val = mumps->id.schur[i * N + j]; 2629 #else 2630 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2631 #endif 2632 array[i * N + j] = val; 2633 array[j * N + i] = val; 2634 } 2635 } 2636 } 2637 } 2638 PetscCall(MatDenseRestoreArray(St, &array)); 2639 *S = St; 2640 PetscFunctionReturn(PETSC_SUCCESS); 2641 } 2642 2643 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2644 { 2645 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2646 2647 PetscFunctionBegin; 2648 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2649 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2650 for (i = 0; i < nICNTL_pre; ++i) 2651 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2652 if (i == nICNTL_pre) { /* not already cached */ 2653 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2654 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2655 mumps->ICNTL_pre[0]++; 2656 } 2657 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2658 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2659 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2660 PetscFunctionReturn(PETSC_SUCCESS); 2661 } 2662 2663 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2664 { 2665 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2666 2667 PetscFunctionBegin; 2668 if (mumps->id.job == JOB_NULL) { 2669 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2670 *ival = 0; 2671 for (i = 0; i < nICNTL_pre; ++i) { 2672 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2673 } 2674 } else *ival = mumps->id.ICNTL(icntl); 2675 PetscFunctionReturn(PETSC_SUCCESS); 2676 } 2677 2678 /*@ 2679 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2680 2681 Logically Collective 2682 2683 Input Parameters: 2684 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2685 . icntl - index of MUMPS parameter array ICNTL() 2686 - ival - value of MUMPS ICNTL(icntl) 2687 2688 Options Database Key: 2689 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2690 2691 Level: beginner 2692 2693 References: 2694 . * - MUMPS Users' Guide 2695 2696 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2697 @*/ 2698 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2699 { 2700 PetscFunctionBegin; 2701 PetscValidType(F, 1); 2702 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2703 PetscValidLogicalCollectiveInt(F, icntl, 2); 2704 PetscValidLogicalCollectiveInt(F, ival, 3); 2705 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2706 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2707 PetscFunctionReturn(PETSC_SUCCESS); 2708 } 2709 2710 /*@ 2711 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2712 2713 Logically Collective 2714 2715 Input Parameters: 2716 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2717 - icntl - index of MUMPS parameter array ICNTL() 2718 2719 Output Parameter: 2720 . ival - value of MUMPS ICNTL(icntl) 2721 2722 Level: beginner 2723 2724 References: 2725 . * - MUMPS Users' Guide 2726 2727 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2728 @*/ 2729 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2730 { 2731 PetscFunctionBegin; 2732 PetscValidType(F, 1); 2733 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2734 PetscValidLogicalCollectiveInt(F, icntl, 2); 2735 PetscValidIntPointer(ival, 3); 2736 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2737 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2738 PetscFunctionReturn(PETSC_SUCCESS); 2739 } 2740 2741 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2742 { 2743 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2744 2745 PetscFunctionBegin; 2746 if (mumps->id.job == JOB_NULL) { 2747 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2748 for (i = 0; i < nCNTL_pre; ++i) 2749 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2750 if (i == nCNTL_pre) { 2751 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2752 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2753 mumps->CNTL_pre[0]++; 2754 } 2755 mumps->CNTL_pre[1 + 2 * i] = icntl; 2756 mumps->CNTL_pre[2 + 2 * i] = val; 2757 } else mumps->id.CNTL(icntl) = val; 2758 PetscFunctionReturn(PETSC_SUCCESS); 2759 } 2760 2761 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2762 { 2763 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2764 2765 PetscFunctionBegin; 2766 if (mumps->id.job == JOB_NULL) { 2767 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2768 *val = 0.0; 2769 for (i = 0; i < nCNTL_pre; ++i) { 2770 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2771 } 2772 } else *val = mumps->id.CNTL(icntl); 2773 PetscFunctionReturn(PETSC_SUCCESS); 2774 } 2775 2776 /*@ 2777 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2778 2779 Logically Collective 2780 2781 Input Parameters: 2782 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2783 . icntl - index of MUMPS parameter array CNTL() 2784 - val - value of MUMPS CNTL(icntl) 2785 2786 Options Database Key: 2787 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2788 2789 Level: beginner 2790 2791 References: 2792 . * - MUMPS Users' Guide 2793 2794 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2795 @*/ 2796 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2797 { 2798 PetscFunctionBegin; 2799 PetscValidType(F, 1); 2800 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2801 PetscValidLogicalCollectiveInt(F, icntl, 2); 2802 PetscValidLogicalCollectiveReal(F, val, 3); 2803 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2804 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2805 PetscFunctionReturn(PETSC_SUCCESS); 2806 } 2807 2808 /*@ 2809 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2810 2811 Logically Collective 2812 2813 Input Parameters: 2814 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2815 - icntl - index of MUMPS parameter array CNTL() 2816 2817 Output Parameter: 2818 . val - value of MUMPS CNTL(icntl) 2819 2820 Level: beginner 2821 2822 References: 2823 . * - MUMPS Users' Guide 2824 2825 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2826 @*/ 2827 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2828 { 2829 PetscFunctionBegin; 2830 PetscValidType(F, 1); 2831 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2832 PetscValidLogicalCollectiveInt(F, icntl, 2); 2833 PetscValidRealPointer(val, 3); 2834 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2835 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2836 PetscFunctionReturn(PETSC_SUCCESS); 2837 } 2838 2839 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2840 { 2841 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2842 2843 PetscFunctionBegin; 2844 *info = mumps->id.INFO(icntl); 2845 PetscFunctionReturn(PETSC_SUCCESS); 2846 } 2847 2848 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2849 { 2850 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2851 2852 PetscFunctionBegin; 2853 *infog = mumps->id.INFOG(icntl); 2854 PetscFunctionReturn(PETSC_SUCCESS); 2855 } 2856 2857 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2858 { 2859 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2860 2861 PetscFunctionBegin; 2862 *rinfo = mumps->id.RINFO(icntl); 2863 PetscFunctionReturn(PETSC_SUCCESS); 2864 } 2865 2866 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2867 { 2868 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2869 2870 PetscFunctionBegin; 2871 *rinfog = mumps->id.RINFOG(icntl); 2872 PetscFunctionReturn(PETSC_SUCCESS); 2873 } 2874 2875 PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2876 { 2877 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2878 2879 PetscFunctionBegin; 2880 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2881 *size = 0; 2882 *array = NULL; 2883 if (!mumps->myid) { 2884 *size = mumps->id.INFOG(28); 2885 PetscCall(PetscMalloc1(*size, array)); 2886 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2887 } 2888 PetscFunctionReturn(PETSC_SUCCESS); 2889 } 2890 2891 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2892 { 2893 Mat Bt = NULL, Btseq = NULL; 2894 PetscBool flg; 2895 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2896 PetscScalar *aa; 2897 PetscInt spnr, *ia, *ja, M, nrhs; 2898 2899 PetscFunctionBegin; 2900 PetscValidPointer(spRHS, 2); 2901 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2902 if (flg) { 2903 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2904 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2905 2906 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2907 2908 if (mumps->petsc_size > 1) { 2909 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2910 Btseq = b->A; 2911 } else { 2912 Btseq = Bt; 2913 } 2914 2915 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2916 mumps->id.nrhs = nrhs; 2917 mumps->id.lrhs = M; 2918 mumps->id.rhs = NULL; 2919 2920 if (!mumps->myid) { 2921 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2922 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2923 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2924 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2925 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2926 } else { 2927 mumps->id.irhs_ptr = NULL; 2928 mumps->id.irhs_sparse = NULL; 2929 mumps->id.nz_rhs = 0; 2930 mumps->id.rhs_sparse = NULL; 2931 } 2932 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2933 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2934 2935 /* solve phase */ 2936 mumps->id.job = JOB_SOLVE; 2937 PetscMUMPS_c(mumps); 2938 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 2939 2940 if (!mumps->myid) { 2941 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 2942 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2943 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2944 } 2945 PetscFunctionReturn(PETSC_SUCCESS); 2946 } 2947 2948 /*@ 2949 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 2950 2951 Logically Collective 2952 2953 Input Parameter: 2954 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2955 2956 Output Parameter: 2957 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 2958 2959 Level: beginner 2960 2961 References: 2962 . * - MUMPS Users' Guide 2963 2964 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 2965 @*/ 2966 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2967 { 2968 PetscFunctionBegin; 2969 PetscValidType(F, 1); 2970 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2971 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 2972 PetscFunctionReturn(PETSC_SUCCESS); 2973 } 2974 2975 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 2976 { 2977 Mat spRHS; 2978 2979 PetscFunctionBegin; 2980 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 2981 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 2982 PetscCall(MatDestroy(&spRHS)); 2983 PetscFunctionReturn(PETSC_SUCCESS); 2984 } 2985 2986 /*@ 2987 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 2988 2989 Logically Collective 2990 2991 Input Parameter: 2992 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2993 2994 Output Parameter: 2995 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 2996 2997 Level: beginner 2998 2999 References: 3000 . * - MUMPS Users' Guide 3001 3002 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3003 @*/ 3004 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3005 { 3006 PetscBool flg; 3007 3008 PetscFunctionBegin; 3009 PetscValidType(F, 1); 3010 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3011 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3012 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3013 3014 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3015 PetscFunctionReturn(PETSC_SUCCESS); 3016 } 3017 3018 /*@ 3019 MatMumpsGetInfo - Get MUMPS parameter INFO() 3020 3021 Logically Collective 3022 3023 Input Parameters: 3024 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3025 - icntl - index of MUMPS parameter array INFO() 3026 3027 Output Parameter: 3028 . ival - value of MUMPS INFO(icntl) 3029 3030 Level: beginner 3031 3032 References: 3033 . * - MUMPS Users' Guide 3034 3035 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3036 @*/ 3037 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3038 { 3039 PetscFunctionBegin; 3040 PetscValidType(F, 1); 3041 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3042 PetscValidIntPointer(ival, 3); 3043 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3044 PetscFunctionReturn(PETSC_SUCCESS); 3045 } 3046 3047 /*@ 3048 MatMumpsGetInfog - Get MUMPS parameter INFOG() 3049 3050 Logically Collective 3051 3052 Input Parameters: 3053 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3054 - icntl - index of MUMPS parameter array INFOG() 3055 3056 Output Parameter: 3057 . ival - value of MUMPS INFOG(icntl) 3058 3059 Level: beginner 3060 3061 References: 3062 . * - MUMPS Users' Guide 3063 3064 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3065 @*/ 3066 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3067 { 3068 PetscFunctionBegin; 3069 PetscValidType(F, 1); 3070 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3071 PetscValidIntPointer(ival, 3); 3072 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3073 PetscFunctionReturn(PETSC_SUCCESS); 3074 } 3075 3076 /*@ 3077 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3078 3079 Logically Collective 3080 3081 Input Parameters: 3082 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3083 - icntl - index of MUMPS parameter array RINFO() 3084 3085 Output Parameter: 3086 . val - value of MUMPS RINFO(icntl) 3087 3088 Level: beginner 3089 3090 References: 3091 . * - MUMPS Users' Guide 3092 3093 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3094 @*/ 3095 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3096 { 3097 PetscFunctionBegin; 3098 PetscValidType(F, 1); 3099 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3100 PetscValidRealPointer(val, 3); 3101 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3102 PetscFunctionReturn(PETSC_SUCCESS); 3103 } 3104 3105 /*@ 3106 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3107 3108 Logically Collective 3109 3110 Input Parameters: 3111 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3112 - icntl - index of MUMPS parameter array RINFOG() 3113 3114 Output Parameter: 3115 . val - value of MUMPS RINFOG(icntl) 3116 3117 Level: beginner 3118 3119 References: 3120 . * - MUMPS Users' Guide 3121 3122 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3123 @*/ 3124 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3125 { 3126 PetscFunctionBegin; 3127 PetscValidType(F, 1); 3128 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3129 PetscValidRealPointer(val, 3); 3130 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3131 PetscFunctionReturn(PETSC_SUCCESS); 3132 } 3133 3134 /*@ 3135 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 3136 3137 Logically Collective 3138 3139 Input Parameter: 3140 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3141 3142 Output Parameters: 3143 + size - local size of the array. The size of the array is non-zero only on the host. 3144 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3145 for freeing this array. 3146 3147 Level: beginner 3148 3149 References: 3150 . * - MUMPS Users' Guide 3151 3152 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3153 @*/ 3154 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3155 { 3156 PetscFunctionBegin; 3157 PetscValidType(F, 1); 3158 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3159 PetscValidIntPointer(size, 2); 3160 PetscValidPointer(array, 3); 3161 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3162 PetscFunctionReturn(PETSC_SUCCESS); 3163 } 3164 3165 /*MC 3166 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3167 distributed and sequential matrices via the external package MUMPS. 3168 3169 Works with `MATAIJ` and `MATSBAIJ` matrices 3170 3171 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3172 3173 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3174 See details below. 3175 3176 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3177 3178 Options Database Keys: 3179 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3180 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3181 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3182 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3183 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3184 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3185 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3186 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3187 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3188 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3189 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3190 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3191 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3192 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3193 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3194 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3195 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3196 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3197 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3198 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3199 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3200 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 3201 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3202 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3203 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3204 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3205 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3206 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3207 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3208 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3209 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3210 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3211 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3212 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3213 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3214 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3215 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3216 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3217 3218 Level: beginner 3219 3220 Notes: 3221 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 3222 error if the matrix is Hermitian. 3223 3224 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3225 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3226 3227 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3228 the failure with 3229 .vb 3230 KSPGetPC(ksp,&pc); 3231 PCFactorGetMatrix(pc,&mat); 3232 MatMumpsGetInfo(mat,....); 3233 MatMumpsGetInfog(mat,....); etc. 3234 .ve 3235 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3236 3237 MUMPS provides 64-bit integer support in two build modes: 3238 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3239 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3240 3241 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3242 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3243 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3244 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3245 3246 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3247 3248 Two modes to run MUMPS/PETSc with OpenMP 3249 .vb 3250 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3251 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3252 .ve 3253 3254 .vb 3255 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3256 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3257 .ve 3258 3259 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3260 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3261 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3262 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3263 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3264 3265 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3266 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3267 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3268 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3269 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3270 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3271 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3272 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3273 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3274 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3275 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3276 examine the mapping result. 3277 3278 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3279 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3280 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3281 3282 References: 3283 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3284 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3285 3286 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3287 M*/ 3288 3289 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3290 { 3291 PetscFunctionBegin; 3292 *type = MATSOLVERMUMPS; 3293 PetscFunctionReturn(PETSC_SUCCESS); 3294 } 3295 3296 /* MatGetFactor for Seq and MPI AIJ matrices */ 3297 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3298 { 3299 Mat B; 3300 Mat_MUMPS *mumps; 3301 PetscBool isSeqAIJ; 3302 PetscMPIInt size; 3303 3304 PetscFunctionBegin; 3305 #if defined(PETSC_USE_COMPLEX) 3306 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3307 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3308 *F = NULL; 3309 PetscFunctionReturn(PETSC_SUCCESS); 3310 } 3311 #endif 3312 /* Create the factorization matrix */ 3313 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3314 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3315 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3316 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3317 PetscCall(MatSetUp(B)); 3318 3319 PetscCall(PetscNew(&mumps)); 3320 3321 B->ops->view = MatView_MUMPS; 3322 B->ops->getinfo = MatGetInfo_MUMPS; 3323 3324 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3325 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3326 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3327 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3328 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3329 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3330 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3331 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3332 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3333 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3334 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3335 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3336 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3337 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3338 3339 if (ftype == MAT_FACTOR_LU) { 3340 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3341 B->factortype = MAT_FACTOR_LU; 3342 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3343 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3344 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3345 mumps->sym = 0; 3346 } else { 3347 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3348 B->factortype = MAT_FACTOR_CHOLESKY; 3349 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3350 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3351 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3352 #if defined(PETSC_USE_COMPLEX) 3353 mumps->sym = 2; 3354 #else 3355 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3356 else mumps->sym = 2; 3357 #endif 3358 } 3359 3360 /* set solvertype */ 3361 PetscCall(PetscFree(B->solvertype)); 3362 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3363 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3364 if (size == 1) { 3365 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3366 B->canuseordering = PETSC_TRUE; 3367 } 3368 B->ops->destroy = MatDestroy_MUMPS; 3369 B->data = (void *)mumps; 3370 3371 *F = B; 3372 mumps->id.job = JOB_NULL; 3373 mumps->ICNTL_pre = NULL; 3374 mumps->CNTL_pre = NULL; 3375 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3376 PetscFunctionReturn(PETSC_SUCCESS); 3377 } 3378 3379 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3380 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3381 { 3382 Mat B; 3383 Mat_MUMPS *mumps; 3384 PetscBool isSeqSBAIJ; 3385 PetscMPIInt size; 3386 3387 PetscFunctionBegin; 3388 #if defined(PETSC_USE_COMPLEX) 3389 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3390 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3391 *F = NULL; 3392 PetscFunctionReturn(PETSC_SUCCESS); 3393 } 3394 #endif 3395 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3396 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3397 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3398 PetscCall(MatSetUp(B)); 3399 3400 PetscCall(PetscNew(&mumps)); 3401 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3402 if (isSeqSBAIJ) { 3403 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3404 } else { 3405 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3406 } 3407 3408 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3409 B->ops->view = MatView_MUMPS; 3410 B->ops->getinfo = MatGetInfo_MUMPS; 3411 3412 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3413 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3414 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3415 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3416 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3417 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3418 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3419 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3420 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3421 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3422 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3423 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3424 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3425 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3426 3427 B->factortype = MAT_FACTOR_CHOLESKY; 3428 #if defined(PETSC_USE_COMPLEX) 3429 mumps->sym = 2; 3430 #else 3431 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3432 else mumps->sym = 2; 3433 #endif 3434 3435 /* set solvertype */ 3436 PetscCall(PetscFree(B->solvertype)); 3437 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3438 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3439 if (size == 1) { 3440 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3441 B->canuseordering = PETSC_TRUE; 3442 } 3443 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3444 B->ops->destroy = MatDestroy_MUMPS; 3445 B->data = (void *)mumps; 3446 3447 *F = B; 3448 mumps->id.job = JOB_NULL; 3449 mumps->ICNTL_pre = NULL; 3450 mumps->CNTL_pre = NULL; 3451 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3452 PetscFunctionReturn(PETSC_SUCCESS); 3453 } 3454 3455 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3456 { 3457 Mat B; 3458 Mat_MUMPS *mumps; 3459 PetscBool isSeqBAIJ; 3460 PetscMPIInt size; 3461 3462 PetscFunctionBegin; 3463 /* Create the factorization matrix */ 3464 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3465 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3466 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3467 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3468 PetscCall(MatSetUp(B)); 3469 3470 PetscCall(PetscNew(&mumps)); 3471 if (ftype == MAT_FACTOR_LU) { 3472 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3473 B->factortype = MAT_FACTOR_LU; 3474 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3475 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3476 mumps->sym = 0; 3477 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3478 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3479 3480 B->ops->view = MatView_MUMPS; 3481 B->ops->getinfo = MatGetInfo_MUMPS; 3482 3483 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3484 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3485 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3486 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3487 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3488 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3489 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3490 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3491 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3492 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3493 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3494 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3495 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3496 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3497 3498 /* set solvertype */ 3499 PetscCall(PetscFree(B->solvertype)); 3500 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3501 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3502 if (size == 1) { 3503 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3504 B->canuseordering = PETSC_TRUE; 3505 } 3506 B->ops->destroy = MatDestroy_MUMPS; 3507 B->data = (void *)mumps; 3508 3509 *F = B; 3510 mumps->id.job = JOB_NULL; 3511 mumps->ICNTL_pre = NULL; 3512 mumps->CNTL_pre = NULL; 3513 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3514 PetscFunctionReturn(PETSC_SUCCESS); 3515 } 3516 3517 /* MatGetFactor for Seq and MPI SELL matrices */ 3518 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3519 { 3520 Mat B; 3521 Mat_MUMPS *mumps; 3522 PetscBool isSeqSELL; 3523 PetscMPIInt size; 3524 3525 PetscFunctionBegin; 3526 /* Create the factorization matrix */ 3527 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3528 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3529 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3530 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3531 PetscCall(MatSetUp(B)); 3532 3533 PetscCall(PetscNew(&mumps)); 3534 3535 B->ops->view = MatView_MUMPS; 3536 B->ops->getinfo = MatGetInfo_MUMPS; 3537 3538 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3539 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3540 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3541 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3542 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3543 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3544 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3545 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3546 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3547 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3548 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3549 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3550 3551 if (ftype == MAT_FACTOR_LU) { 3552 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3553 B->factortype = MAT_FACTOR_LU; 3554 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3555 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3556 mumps->sym = 0; 3557 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3558 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3559 3560 /* set solvertype */ 3561 PetscCall(PetscFree(B->solvertype)); 3562 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3563 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3564 if (size == 1) { 3565 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3566 B->canuseordering = PETSC_TRUE; 3567 } 3568 B->ops->destroy = MatDestroy_MUMPS; 3569 B->data = (void *)mumps; 3570 3571 *F = B; 3572 mumps->id.job = JOB_NULL; 3573 mumps->ICNTL_pre = NULL; 3574 mumps->CNTL_pre = NULL; 3575 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3576 PetscFunctionReturn(PETSC_SUCCESS); 3577 } 3578 3579 /* MatGetFactor for MATNEST matrices */ 3580 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3581 { 3582 Mat B, **mats; 3583 Mat_MUMPS *mumps; 3584 PetscInt nr, nc; 3585 PetscMPIInt size; 3586 PetscBool flg = PETSC_TRUE; 3587 3588 PetscFunctionBegin; 3589 #if defined(PETSC_USE_COMPLEX) 3590 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3591 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3592 *F = NULL; 3593 PetscFunctionReturn(PETSC_SUCCESS); 3594 } 3595 #endif 3596 3597 /* Return if some condition is not satisfied */ 3598 *F = NULL; 3599 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3600 if (ftype == MAT_FACTOR_CHOLESKY) { 3601 IS *rows, *cols; 3602 PetscInt *m, *M; 3603 3604 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3605 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3606 PetscCall(MatNestGetISs(A, rows, cols)); 3607 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3608 if (!flg) { 3609 PetscCall(PetscFree2(rows, cols)); 3610 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3611 PetscFunctionReturn(PETSC_SUCCESS); 3612 } 3613 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3614 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3615 for (PetscInt r = 0; flg && r < nr; r++) 3616 for (PetscInt k = r + 1; flg && k < nr; k++) 3617 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3618 PetscCall(PetscFree2(m, M)); 3619 PetscCall(PetscFree2(rows, cols)); 3620 if (!flg) { 3621 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3622 PetscFunctionReturn(PETSC_SUCCESS); 3623 } 3624 } 3625 3626 for (PetscInt r = 0; r < nr; r++) { 3627 for (PetscInt c = 0; c < nc; c++) { 3628 Mat sub = mats[r][c]; 3629 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans; 3630 3631 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3632 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3633 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3634 else { 3635 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3636 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3637 } 3638 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3639 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3640 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3641 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3642 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3643 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3644 if (ftype == MAT_FACTOR_CHOLESKY) { 3645 if (r == c && !isSeqAIJ && !isMPIAIJ && !isSeqSBAIJ && !isMPISBAIJ) { 3646 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3647 flg = PETSC_FALSE; 3648 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 3649 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3650 flg = PETSC_FALSE; 3651 } 3652 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 3653 PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3654 flg = PETSC_FALSE; 3655 } 3656 } 3657 } 3658 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3659 3660 /* Create the factorization matrix */ 3661 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3662 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3663 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3664 PetscCall(MatSetUp(B)); 3665 3666 PetscCall(PetscNew(&mumps)); 3667 3668 B->ops->view = MatView_MUMPS; 3669 B->ops->getinfo = MatGetInfo_MUMPS; 3670 3671 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3672 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3673 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3674 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3675 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3676 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3677 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3678 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3679 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3680 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3681 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3682 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3683 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3684 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3685 3686 if (ftype == MAT_FACTOR_LU) { 3687 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3688 B->factortype = MAT_FACTOR_LU; 3689 mumps->sym = 0; 3690 } else { 3691 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3692 B->factortype = MAT_FACTOR_CHOLESKY; 3693 #if defined(PETSC_USE_COMPLEX) 3694 mumps->sym = 2; 3695 #else 3696 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3697 else mumps->sym = 2; 3698 #endif 3699 } 3700 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3701 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3702 3703 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3704 if (size == 1) { 3705 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3706 B->canuseordering = PETSC_TRUE; 3707 } 3708 3709 /* set solvertype */ 3710 PetscCall(PetscFree(B->solvertype)); 3711 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3712 B->ops->destroy = MatDestroy_MUMPS; 3713 B->data = (void *)mumps; 3714 3715 *F = B; 3716 mumps->id.job = JOB_NULL; 3717 mumps->ICNTL_pre = NULL; 3718 mumps->CNTL_pre = NULL; 3719 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3720 PetscFunctionReturn(PETSC_SUCCESS); 3721 } 3722 3723 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3724 { 3725 PetscFunctionBegin; 3726 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3727 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3728 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3729 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3730 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3731 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3732 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3733 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3734 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3735 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3736 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3737 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3738 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3739 PetscFunctionReturn(PETSC_SUCCESS); 3740 } 3741