1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #if defined(PETSC_USE_REAL_SINGLE) 15 #include <cmumps_c.h> 16 #else 17 #include <zmumps_c.h> 18 #endif 19 #else 20 #if defined(PETSC_USE_REAL_SINGLE) 21 #include <smumps_c.h> 22 #else 23 #include <dmumps_c.h> 24 #endif 25 #endif 26 EXTERN_C_END 27 #define JOB_INIT -1 28 #define JOB_NULL 0 29 #define JOB_FACTSYMBOLIC 1 30 #define JOB_FACTNUMERIC 2 31 #define JOB_SOLVE 3 32 #define JOB_END -2 33 34 /* calls to MUMPS */ 35 #if defined(PETSC_USE_COMPLEX) 36 #if defined(PETSC_USE_REAL_SINGLE) 37 #define MUMPS_c cmumps_c 38 #else 39 #define MUMPS_c zmumps_c 40 #endif 41 #else 42 #if defined(PETSC_USE_REAL_SINGLE) 43 #define MUMPS_c smumps_c 44 #else 45 #define MUMPS_c dmumps_c 46 #endif 47 #endif 48 49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 50 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 51 naming convention in PetscMPIInt, PetscBLASInt etc. 52 */ 53 typedef MUMPS_INT PetscMUMPSInt; 54 55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 56 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 57 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 58 #endif 59 #else 60 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 61 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 62 #endif 63 #endif 64 65 #define MPIU_MUMPSINT MPI_INT 66 #define PETSC_MUMPS_INT_MAX 2147483647 67 #define PETSC_MUMPS_INT_MIN -2147483648 68 69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 70 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b) 71 { 72 PetscFunctionBegin; 73 #if PetscDefined(USE_64BIT_INDICES) 74 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 75 #endif 76 *b = (PetscMUMPSInt)a; 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 /* Put these utility routines here since they are only used in this file */ 81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 82 { 83 PetscInt myval; 84 PetscBool myset; 85 86 PetscFunctionBegin; 87 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 88 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 89 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 90 if (set) *set = myset; 91 PetscFunctionReturn(PETSC_SUCCESS); 92 } 93 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 94 95 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 96 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 97 #define PetscMUMPS_c(mumps) \ 98 do { \ 99 if (mumps->use_petsc_omp_support) { \ 100 if (mumps->is_omp_master) { \ 101 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 102 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 103 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 104 PetscCall(PetscFPTrapPop()); \ 105 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 106 } \ 107 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 108 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 109 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 110 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 111 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 112 */ \ 113 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 115 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 116 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 117 } else { \ 118 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 119 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 120 PetscCall(PetscFPTrapPop()); \ 121 } \ 122 } while (0) 123 #else 124 #define PetscMUMPS_c(mumps) \ 125 do { \ 126 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 127 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 128 PetscCall(PetscFPTrapPop()); \ 129 } while (0) 130 #endif 131 132 /* declare MumpsScalar */ 133 #if defined(PETSC_USE_COMPLEX) 134 #if defined(PETSC_USE_REAL_SINGLE) 135 #define MumpsScalar mumps_complex 136 #else 137 #define MumpsScalar mumps_double_complex 138 #endif 139 #else 140 #define MumpsScalar PetscScalar 141 #endif 142 143 /* macros s.t. indices match MUMPS documentation */ 144 #define ICNTL(I) icntl[(I) - 1] 145 #define CNTL(I) cntl[(I) - 1] 146 #define INFOG(I) infog[(I) - 1] 147 #define INFO(I) info[(I) - 1] 148 #define RINFOG(I) rinfog[(I) - 1] 149 #define RINFO(I) rinfo[(I) - 1] 150 151 typedef struct Mat_MUMPS Mat_MUMPS; 152 struct Mat_MUMPS { 153 #if defined(PETSC_USE_COMPLEX) 154 #if defined(PETSC_USE_REAL_SINGLE) 155 CMUMPS_STRUC_C id; 156 #else 157 ZMUMPS_STRUC_C id; 158 #endif 159 #else 160 #if defined(PETSC_USE_REAL_SINGLE) 161 SMUMPS_STRUC_C id; 162 #else 163 DMUMPS_STRUC_C id; 164 #endif 165 #endif 166 167 MatStructure matstruc; 168 PetscMPIInt myid, petsc_size; 169 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 170 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 171 PetscCount nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 172 PetscMUMPSInt sym; 173 MPI_Comm mumps_comm; 174 PetscMUMPSInt *ICNTL_pre; 175 PetscReal *CNTL_pre; 176 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 177 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 178 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 179 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 180 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 181 PetscInt *rhs_nrow, max_nrhs; 182 PetscMPIInt *rhs_recvcounts, *rhs_disps; 183 PetscScalar *rhs_loc, *rhs_recvbuf; 184 #endif 185 Vec b_seq, x_seq; 186 PetscInt ninfo, *info; /* which INFO to display */ 187 PetscInt sizeredrhs; 188 PetscScalar *schur_sol; 189 PetscInt schur_sizesol; 190 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 191 PetscCount cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 192 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 193 194 /* Support for MATNEST */ 195 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 196 PetscCount *nest_vals_start; 197 PetscScalar *nest_vals; 198 199 /* stuff used by petsc/mumps OpenMP support*/ 200 PetscBool use_petsc_omp_support; 201 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 202 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */ 203 PetscCount *recvcount; /* a collection of nnz on omp_master */ 204 PetscMPIInt tag, omp_comm_size; 205 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 206 MPI_Request *reqs; 207 }; 208 209 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 210 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 211 */ 212 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 213 { 214 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */ 215 216 PetscFunctionBegin; 217 #if defined(PETSC_USE_64BIT_INDICES) 218 { 219 PetscInt i; 220 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 221 PetscCall(PetscFree(mumps->ia_alloc)); 222 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 223 mumps->cur_ilen = nrow + 1; 224 } 225 if (nnz > mumps->cur_jlen) { 226 PetscCall(PetscFree(mumps->ja_alloc)); 227 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 228 mumps->cur_jlen = nnz; 229 } 230 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i])); 231 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i])); 232 *ia_mumps = mumps->ia_alloc; 233 *ja_mumps = mumps->ja_alloc; 234 } 235 #else 236 *ia_mumps = ia; 237 *ja_mumps = ja; 238 #endif 239 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 240 PetscFunctionReturn(PETSC_SUCCESS); 241 } 242 243 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 244 { 245 PetscFunctionBegin; 246 PetscCall(PetscFree(mumps->id.listvar_schur)); 247 PetscCall(PetscFree(mumps->id.redrhs)); 248 PetscCall(PetscFree(mumps->schur_sol)); 249 mumps->id.size_schur = 0; 250 mumps->id.schur_lld = 0; 251 mumps->id.ICNTL(19) = 0; 252 PetscFunctionReturn(PETSC_SUCCESS); 253 } 254 255 /* solve with rhs in mumps->id.redrhs and return in the same location */ 256 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 257 { 258 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 259 Mat S, B, X; 260 MatFactorSchurStatus schurstatus; 261 PetscInt sizesol; 262 263 PetscFunctionBegin; 264 PetscCall(MatFactorFactorizeSchurComplement(F)); 265 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 266 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 267 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 268 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 269 PetscCall(MatBindToCPU(B, S->boundtocpu)); 270 #endif 271 switch (schurstatus) { 272 case MAT_FACTOR_SCHUR_FACTORED: 273 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 274 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 275 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 276 PetscCall(MatBindToCPU(X, S->boundtocpu)); 277 #endif 278 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 279 PetscCall(MatMatSolveTranspose(S, B, X)); 280 } else { 281 PetscCall(MatMatSolve(S, B, X)); 282 } 283 break; 284 case MAT_FACTOR_SCHUR_INVERTED: 285 sizesol = mumps->id.nrhs * mumps->id.size_schur; 286 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 287 PetscCall(PetscFree(mumps->schur_sol)); 288 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 289 mumps->schur_sizesol = sizesol; 290 } 291 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 292 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 294 PetscCall(MatBindToCPU(X, S->boundtocpu)); 295 #endif 296 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 297 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 298 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 299 } else { 300 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 301 } 302 PetscCall(MatProductSetFromOptions(X)); 303 PetscCall(MatProductSymbolic(X)); 304 PetscCall(MatProductNumeric(X)); 305 306 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 307 break; 308 default: 309 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 310 } 311 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 312 PetscCall(MatDestroy(&B)); 313 PetscCall(MatDestroy(&X)); 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 318 { 319 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 320 321 PetscFunctionBegin; 322 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 if (!expansion) { /* prepare for the condensation step */ 326 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 327 /* allocate MUMPS internal array to store reduced right-hand sides */ 328 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 329 PetscCall(PetscFree(mumps->id.redrhs)); 330 mumps->id.lredrhs = mumps->id.size_schur; 331 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 332 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 333 } 334 } else { /* prepare for the expansion step */ 335 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 336 PetscCall(MatMumpsSolveSchur_Private(F)); 337 mumps->id.ICNTL(26) = 2; /* expansion phase */ 338 PetscMUMPS_c(mumps); 339 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 340 /* restore defaults */ 341 mumps->id.ICNTL(26) = -1; 342 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 343 if (mumps->id.nrhs > 1) { 344 PetscCall(PetscFree(mumps->id.redrhs)); 345 mumps->id.lredrhs = 0; 346 mumps->sizeredrhs = 0; 347 } 348 } 349 PetscFunctionReturn(PETSC_SUCCESS); 350 } 351 352 /* 353 MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz] 354 355 input: 356 A - matrix in aij,baij or sbaij format 357 shift - 0: C style output triple; 1: Fortran style output triple. 358 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 359 MAT_REUSE_MATRIX: only the values in v array are updated 360 output: 361 nnz - dim of r, c, and v (number of local nonzero entries of A) 362 r, c, v - row and col index, matrix values (matrix triples) 363 364 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 365 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 366 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 367 368 */ 369 370 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 371 { 372 const PetscScalar *av; 373 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 374 PetscCount nz, rnz, k; 375 PetscMUMPSInt *row, *col; 376 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 377 378 PetscFunctionBegin; 379 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 380 if (reuse == MAT_INITIAL_MATRIX) { 381 nz = aa->nz; 382 ai = aa->i; 383 aj = aa->j; 384 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 385 for (PetscCount i = k = 0; i < M; i++) { 386 rnz = ai[i + 1] - ai[i]; 387 ajj = aj + ai[i]; 388 for (PetscCount j = 0; j < rnz; j++) { 389 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 390 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 391 k++; 392 } 393 } 394 mumps->val = (PetscScalar *)av; 395 mumps->irn = row; 396 mumps->jcn = col; 397 mumps->nnz = nz; 398 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 399 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 400 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 401 PetscFunctionReturn(PETSC_SUCCESS); 402 } 403 404 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 405 { 406 PetscCount nz, i, j, k, r; 407 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 408 PetscMUMPSInt *row, *col; 409 410 PetscFunctionBegin; 411 nz = a->sliidx[a->totalslices]; 412 if (reuse == MAT_INITIAL_MATRIX) { 413 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 414 for (i = k = 0; i < a->totalslices; i++) { 415 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 416 } 417 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 418 mumps->irn = row; 419 mumps->jcn = col; 420 mumps->nnz = nz; 421 mumps->val = a->val; 422 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 423 else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 424 PetscFunctionReturn(PETSC_SUCCESS); 425 } 426 427 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 428 { 429 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 430 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 431 PetscCount M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 432 PetscInt bs; 433 PetscMUMPSInt *row, *col; 434 435 PetscFunctionBegin; 436 if (reuse == MAT_INITIAL_MATRIX) { 437 PetscCall(MatGetBlockSize(A, &bs)); 438 M = A->rmap->N / bs; 439 ai = aa->i; 440 aj = aa->j; 441 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 442 for (i = 0; i < M; i++) { 443 ajj = aj + ai[i]; 444 rnz = ai[i + 1] - ai[i]; 445 for (k = 0; k < rnz; k++) { 446 for (j = 0; j < bs; j++) { 447 for (m = 0; m < bs; m++) { 448 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 449 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 450 idx++; 451 } 452 } 453 } 454 } 455 mumps->irn = row; 456 mumps->jcn = col; 457 mumps->nnz = nz; 458 mumps->val = aa->a; 459 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 460 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 461 PetscFunctionReturn(PETSC_SUCCESS); 462 } 463 464 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 465 { 466 const PetscInt *ai, *aj, *ajj; 467 PetscInt bs; 468 PetscCount nz, rnz, i, j, k, m; 469 PetscMUMPSInt *row, *col; 470 PetscScalar *val; 471 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 472 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 473 #if defined(PETSC_USE_COMPLEX) 474 PetscBool isset, hermitian; 475 #endif 476 477 PetscFunctionBegin; 478 #if defined(PETSC_USE_COMPLEX) 479 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 480 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 481 #endif 482 ai = aa->i; 483 aj = aa->j; 484 PetscCall(MatGetBlockSize(A, &bs)); 485 if (reuse == MAT_INITIAL_MATRIX) { 486 const PetscCount alloc_size = aa->nz * bs2; 487 488 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 489 if (bs > 1) { 490 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 491 mumps->val = mumps->val_alloc; 492 } else { 493 mumps->val = aa->a; 494 } 495 mumps->irn = row; 496 mumps->jcn = col; 497 } else { 498 row = mumps->irn; 499 col = mumps->jcn; 500 } 501 val = mumps->val; 502 503 nz = 0; 504 if (bs > 1) { 505 for (i = 0; i < mbs; i++) { 506 rnz = ai[i + 1] - ai[i]; 507 ajj = aj + ai[i]; 508 for (j = 0; j < rnz; j++) { 509 for (k = 0; k < bs; k++) { 510 for (m = 0; m < bs; m++) { 511 if (ajj[j] > i || k >= m) { 512 if (reuse == MAT_INITIAL_MATRIX) { 513 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 514 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 515 } 516 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 517 } 518 } 519 } 520 } 521 } 522 } else if (reuse == MAT_INITIAL_MATRIX) { 523 for (i = 0; i < mbs; i++) { 524 rnz = ai[i + 1] - ai[i]; 525 ajj = aj + ai[i]; 526 for (j = 0; j < rnz; j++) { 527 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 528 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 529 nz++; 530 } 531 } 532 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz); 533 } else if (mumps->nest_vals) 534 PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 535 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 536 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 537 PetscFunctionReturn(PETSC_SUCCESS); 538 } 539 540 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 541 { 542 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 543 PetscCount nz, rnz, i, j; 544 const PetscScalar *av, *v1; 545 PetscScalar *val; 546 PetscMUMPSInt *row, *col; 547 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 548 PetscBool missing; 549 #if defined(PETSC_USE_COMPLEX) 550 PetscBool hermitian, isset; 551 #endif 552 553 PetscFunctionBegin; 554 #if defined(PETSC_USE_COMPLEX) 555 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 556 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 557 #endif 558 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 559 ai = aa->i; 560 aj = aa->j; 561 adiag = aa->diag; 562 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 563 if (reuse == MAT_INITIAL_MATRIX) { 564 /* count nz in the upper triangular part of A */ 565 nz = 0; 566 if (missing) { 567 for (i = 0; i < M; i++) { 568 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 569 for (j = ai[i]; j < ai[i + 1]; j++) { 570 if (aj[j] < i) continue; 571 nz++; 572 } 573 } else { 574 nz += ai[i + 1] - adiag[i]; 575 } 576 } 577 } else { 578 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 579 } 580 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 581 PetscCall(PetscMalloc1(nz, &val)); 582 mumps->nnz = nz; 583 mumps->irn = row; 584 mumps->jcn = col; 585 mumps->val = mumps->val_alloc = val; 586 587 nz = 0; 588 if (missing) { 589 for (i = 0; i < M; i++) { 590 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 591 for (j = ai[i]; j < ai[i + 1]; j++) { 592 if (aj[j] < i) continue; 593 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 594 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 595 val[nz] = av[j]; 596 nz++; 597 } 598 } else { 599 rnz = ai[i + 1] - adiag[i]; 600 ajj = aj + adiag[i]; 601 v1 = av + adiag[i]; 602 for (j = 0; j < rnz; j++) { 603 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 604 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 605 val[nz++] = v1[j]; 606 } 607 } 608 } 609 } else { 610 for (i = 0; i < M; i++) { 611 rnz = ai[i + 1] - adiag[i]; 612 ajj = aj + adiag[i]; 613 v1 = av + adiag[i]; 614 for (j = 0; j < rnz; j++) { 615 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 616 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 617 val[nz++] = v1[j]; 618 } 619 } 620 } 621 } else { 622 nz = 0; 623 val = mumps->val; 624 if (missing) { 625 for (i = 0; i < M; i++) { 626 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 627 for (j = ai[i]; j < ai[i + 1]; j++) { 628 if (aj[j] < i) continue; 629 val[nz++] = av[j]; 630 } 631 } else { 632 rnz = ai[i + 1] - adiag[i]; 633 v1 = av + adiag[i]; 634 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 635 } 636 } 637 } else { 638 for (i = 0; i < M; i++) { 639 rnz = ai[i + 1] - adiag[i]; 640 v1 = av + adiag[i]; 641 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 642 } 643 } 644 } 645 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 646 PetscFunctionReturn(PETSC_SUCCESS); 647 } 648 649 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 650 { 651 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 652 PetscInt bs; 653 PetscCount rstart, nz, i, j, k, m, jj, irow, countA, countB; 654 PetscMUMPSInt *row, *col; 655 const PetscScalar *av, *bv, *v1, *v2; 656 PetscScalar *val; 657 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 658 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)mat->A->data; 659 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 660 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 661 #if defined(PETSC_USE_COMPLEX) 662 PetscBool hermitian, isset; 663 #endif 664 665 PetscFunctionBegin; 666 #if defined(PETSC_USE_COMPLEX) 667 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 668 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 669 #endif 670 PetscCall(MatGetBlockSize(A, &bs)); 671 rstart = A->rmap->rstart; 672 ai = aa->i; 673 aj = aa->j; 674 bi = bb->i; 675 bj = bb->j; 676 av = aa->a; 677 bv = bb->a; 678 679 garray = mat->garray; 680 681 if (reuse == MAT_INITIAL_MATRIX) { 682 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 683 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 684 PetscCall(PetscMalloc1(nz, &val)); 685 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 686 mumps->irn = row; 687 mumps->jcn = col; 688 mumps->val = mumps->val_alloc = val; 689 } else { 690 val = mumps->val; 691 } 692 693 jj = 0; 694 irow = rstart; 695 for (i = 0; i < mbs; i++) { 696 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 697 countA = ai[i + 1] - ai[i]; 698 countB = bi[i + 1] - bi[i]; 699 bjj = bj + bi[i]; 700 v1 = av + ai[i] * bs2; 701 v2 = bv + bi[i] * bs2; 702 703 if (bs > 1) { 704 /* A-part */ 705 for (j = 0; j < countA; j++) { 706 for (k = 0; k < bs; k++) { 707 for (m = 0; m < bs; m++) { 708 if (rstart + ajj[j] * bs > irow || k >= m) { 709 if (reuse == MAT_INITIAL_MATRIX) { 710 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 711 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 712 } 713 val[jj++] = v1[j * bs2 + m + k * bs]; 714 } 715 } 716 } 717 } 718 719 /* B-part */ 720 for (j = 0; j < countB; j++) { 721 for (k = 0; k < bs; k++) { 722 for (m = 0; m < bs; m++) { 723 if (reuse == MAT_INITIAL_MATRIX) { 724 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 725 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 726 } 727 val[jj++] = v2[j * bs2 + m + k * bs]; 728 } 729 } 730 } 731 } else { 732 /* A-part */ 733 for (j = 0; j < countA; j++) { 734 if (reuse == MAT_INITIAL_MATRIX) { 735 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 736 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 737 } 738 val[jj++] = v1[j]; 739 } 740 741 /* B-part */ 742 for (j = 0; j < countB; j++) { 743 if (reuse == MAT_INITIAL_MATRIX) { 744 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 745 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 746 } 747 val[jj++] = v2[j]; 748 } 749 } 750 irow += bs; 751 } 752 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 753 PetscFunctionReturn(PETSC_SUCCESS); 754 } 755 756 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 757 { 758 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 759 PetscCount rstart, cstart, nz, i, j, jj, irow, countA, countB; 760 PetscMUMPSInt *row, *col; 761 const PetscScalar *av, *bv, *v1, *v2; 762 PetscScalar *val; 763 Mat Ad, Ao; 764 Mat_SeqAIJ *aa; 765 Mat_SeqAIJ *bb; 766 767 PetscFunctionBegin; 768 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 769 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 770 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 771 772 aa = (Mat_SeqAIJ *)Ad->data; 773 bb = (Mat_SeqAIJ *)Ao->data; 774 ai = aa->i; 775 aj = aa->j; 776 bi = bb->i; 777 bj = bb->j; 778 779 rstart = A->rmap->rstart; 780 cstart = A->cmap->rstart; 781 782 if (reuse == MAT_INITIAL_MATRIX) { 783 nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 784 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 785 PetscCall(PetscMalloc1(nz, &val)); 786 mumps->nnz = nz; 787 mumps->irn = row; 788 mumps->jcn = col; 789 mumps->val = mumps->val_alloc = val; 790 } else { 791 val = mumps->val; 792 } 793 794 jj = 0; 795 irow = rstart; 796 for (i = 0; i < m; i++) { 797 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 798 countA = ai[i + 1] - ai[i]; 799 countB = bi[i + 1] - bi[i]; 800 bjj = bj + bi[i]; 801 v1 = av + ai[i]; 802 v2 = bv + bi[i]; 803 804 /* A-part */ 805 for (j = 0; j < countA; j++) { 806 if (reuse == MAT_INITIAL_MATRIX) { 807 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 808 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 809 } 810 val[jj++] = v1[j]; 811 } 812 813 /* B-part */ 814 for (j = 0; j < countB; j++) { 815 if (reuse == MAT_INITIAL_MATRIX) { 816 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 817 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 818 } 819 val[jj++] = v2[j]; 820 } 821 irow++; 822 } 823 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 824 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 825 PetscFunctionReturn(PETSC_SUCCESS); 826 } 827 828 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 829 { 830 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 831 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)mat->A->data; 832 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 833 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 834 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 835 const PetscInt bs2 = mat->bs2; 836 PetscInt bs; 837 PetscCount nz, i, j, k, n, jj, irow, countA, countB, idx; 838 PetscMUMPSInt *row, *col; 839 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 840 PetscScalar *val; 841 842 PetscFunctionBegin; 843 PetscCall(MatGetBlockSize(A, &bs)); 844 if (reuse == MAT_INITIAL_MATRIX) { 845 nz = bs2 * (aa->nz + bb->nz); 846 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 847 PetscCall(PetscMalloc1(nz, &val)); 848 mumps->nnz = nz; 849 mumps->irn = row; 850 mumps->jcn = col; 851 mumps->val = mumps->val_alloc = val; 852 } else { 853 val = mumps->val; 854 } 855 856 jj = 0; 857 irow = rstart; 858 for (i = 0; i < mbs; i++) { 859 countA = ai[i + 1] - ai[i]; 860 countB = bi[i + 1] - bi[i]; 861 ajj = aj + ai[i]; 862 bjj = bj + bi[i]; 863 v1 = av + bs2 * ai[i]; 864 v2 = bv + bs2 * bi[i]; 865 866 idx = 0; 867 /* A-part */ 868 for (k = 0; k < countA; k++) { 869 for (j = 0; j < bs; j++) { 870 for (n = 0; n < bs; n++) { 871 if (reuse == MAT_INITIAL_MATRIX) { 872 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 873 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 874 } 875 val[jj++] = v1[idx++]; 876 } 877 } 878 } 879 880 idx = 0; 881 /* B-part */ 882 for (k = 0; k < countB; k++) { 883 for (j = 0; j < bs; j++) { 884 for (n = 0; n < bs; n++) { 885 if (reuse == MAT_INITIAL_MATRIX) { 886 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 887 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 888 } 889 val[jj++] = v2[idx++]; 890 } 891 } 892 } 893 irow += bs; 894 } 895 PetscFunctionReturn(PETSC_SUCCESS); 896 } 897 898 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 899 { 900 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 901 PetscCount rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 902 PetscMUMPSInt *row, *col; 903 const PetscScalar *av, *bv, *v1, *v2; 904 PetscScalar *val; 905 Mat Ad, Ao; 906 Mat_SeqAIJ *aa; 907 Mat_SeqAIJ *bb; 908 #if defined(PETSC_USE_COMPLEX) 909 PetscBool hermitian, isset; 910 #endif 911 912 PetscFunctionBegin; 913 #if defined(PETSC_USE_COMPLEX) 914 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 915 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 916 #endif 917 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 918 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 919 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 920 921 aa = (Mat_SeqAIJ *)Ad->data; 922 bb = (Mat_SeqAIJ *)Ao->data; 923 ai = aa->i; 924 aj = aa->j; 925 adiag = aa->diag; 926 bi = bb->i; 927 bj = bb->j; 928 929 rstart = A->rmap->rstart; 930 931 if (reuse == MAT_INITIAL_MATRIX) { 932 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 933 nzb = 0; /* num of upper triangular entries in mat->B */ 934 for (i = 0; i < m; i++) { 935 nza += (ai[i + 1] - adiag[i]); 936 countB = bi[i + 1] - bi[i]; 937 bjj = bj + bi[i]; 938 for (j = 0; j < countB; j++) { 939 if (garray[bjj[j]] > rstart) nzb++; 940 } 941 } 942 943 nz = nza + nzb; /* total nz of upper triangular part of mat */ 944 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 945 PetscCall(PetscMalloc1(nz, &val)); 946 mumps->nnz = nz; 947 mumps->irn = row; 948 mumps->jcn = col; 949 mumps->val = mumps->val_alloc = val; 950 } else { 951 val = mumps->val; 952 } 953 954 jj = 0; 955 irow = rstart; 956 for (i = 0; i < m; i++) { 957 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 958 v1 = av + adiag[i]; 959 countA = ai[i + 1] - adiag[i]; 960 countB = bi[i + 1] - bi[i]; 961 bjj = bj + bi[i]; 962 v2 = bv + bi[i]; 963 964 /* A-part */ 965 for (j = 0; j < countA; j++) { 966 if (reuse == MAT_INITIAL_MATRIX) { 967 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 968 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 969 } 970 val[jj++] = v1[j]; 971 } 972 973 /* B-part */ 974 for (j = 0; j < countB; j++) { 975 if (garray[bjj[j]] > rstart) { 976 if (reuse == MAT_INITIAL_MATRIX) { 977 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 978 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 979 } 980 val[jj++] = v2[j]; 981 } 982 } 983 irow++; 984 } 985 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 986 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 987 PetscFunctionReturn(PETSC_SUCCESS); 988 } 989 990 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 991 { 992 const PetscScalar *av; 993 const PetscInt M = A->rmap->n; 994 PetscCount i; 995 PetscMUMPSInt *row, *col; 996 Vec v; 997 998 PetscFunctionBegin; 999 PetscCall(MatDiagonalGetDiagonal(A, &v)); 1000 PetscCall(VecGetArrayRead(v, &av)); 1001 if (reuse == MAT_INITIAL_MATRIX) { 1002 PetscCall(PetscMalloc2(M, &row, M, &col)); 1003 for (i = 0; i < M; i++) { 1004 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1005 col[i] = row[i]; 1006 } 1007 mumps->val = (PetscScalar *)av; 1008 mumps->irn = row; 1009 mumps->jcn = col; 1010 mumps->nnz = M; 1011 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1012 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1013 PetscCall(VecRestoreArrayRead(v, &av)); 1014 PetscFunctionReturn(PETSC_SUCCESS); 1015 } 1016 1017 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1018 { 1019 PetscScalar *v; 1020 const PetscInt m = A->rmap->n, N = A->cmap->N; 1021 PetscInt lda; 1022 PetscCount i, j; 1023 PetscMUMPSInt *row, *col; 1024 1025 PetscFunctionBegin; 1026 PetscCall(MatDenseGetArray(A, &v)); 1027 PetscCall(MatDenseGetLDA(A, &lda)); 1028 if (reuse == MAT_INITIAL_MATRIX) { 1029 PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 1030 for (i = 0; i < m; i++) { 1031 col[i] = 0; 1032 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1033 } 1034 for (j = 1; j < N; j++) { 1035 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 1036 PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 1037 } 1038 if (lda == m) mumps->val = v; 1039 else { 1040 PetscCall(PetscMalloc1(m * N, &mumps->val)); 1041 mumps->val_alloc = mumps->val; 1042 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1043 } 1044 mumps->irn = row; 1045 mumps->jcn = col; 1046 mumps->nnz = m * N; 1047 } else { 1048 if (lda == m && !mumps->nest_vals) mumps->val = v; 1049 else { 1050 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1051 } 1052 } 1053 PetscCall(MatDenseRestoreArray(A, &v)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a 1058 // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated 1059 // and its rows and columns permuted 1060 // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest() 1061 static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap) 1062 { 1063 Mat A; 1064 PetscScalar s[2]; 1065 PetscBool isTrans, isHTrans, compare; 1066 1067 PetscFunctionBegin; 1068 do { 1069 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1070 if (isTrans) { 1071 PetscCall(MatTransposeGetMat(*sub, &A)); 1072 isHTrans = PETSC_FALSE; 1073 } else { 1074 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1075 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A)); 1076 } 1077 compare = (PetscBool)(isTrans || isHTrans); 1078 if (compare) { 1079 if (vshift && vscale) { 1080 PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 1081 if (!*conjugate) { 1082 *vshift += s[0] * *vscale; 1083 *vscale *= s[1]; 1084 } else { 1085 *vshift += PetscConj(s[0]) * *vscale; 1086 *vscale *= PetscConj(s[1]); 1087 } 1088 } 1089 if (swap) *swap = (PetscBool)!*swap; 1090 if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate; 1091 *sub = A; 1092 } 1093 } while (compare); 1094 PetscFunctionReturn(PETSC_SUCCESS); 1095 } 1096 1097 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1098 { 1099 Mat **mats; 1100 PetscInt nr, nc; 1101 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1102 1103 PetscFunctionBegin; 1104 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1105 if (reuse == MAT_INITIAL_MATRIX) { 1106 PetscMUMPSInt *irns, *jcns; 1107 PetscScalar *vals; 1108 PetscCount totnnz, cumnnz, maxnnz; 1109 PetscInt *pjcns_w, Mbs = 0; 1110 IS *rows, *cols; 1111 PetscInt **rows_idx, **cols_idx; 1112 1113 cumnnz = 0; 1114 maxnnz = 0; 1115 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1116 for (PetscInt r = 0; r < nr; r++) { 1117 for (PetscInt c = 0; c < nc; c++) { 1118 Mat sub = mats[r][c]; 1119 1120 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1121 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1122 if (sub) { 1123 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1124 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 1125 MatInfo info; 1126 1127 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 1128 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1129 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1130 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1131 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1132 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1133 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1134 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1135 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 1136 1137 if (chol) { 1138 if (r == c) { 1139 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1140 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1141 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1142 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1143 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1144 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1145 } else { 1146 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1147 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1148 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1149 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1150 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1151 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1152 } 1153 } else { 1154 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1155 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1156 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1157 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1158 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1159 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1160 } 1161 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1162 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1163 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1164 cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */ 1165 maxnnz = PetscMax(maxnnz, info.nz_used); 1166 } 1167 } 1168 } 1169 1170 /* Allocate total COO */ 1171 totnnz = cumnnz; 1172 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1173 PetscCall(PetscMalloc1(totnnz, &vals)); 1174 1175 /* Handle rows and column maps 1176 We directly map rows and use an SF for the columns */ 1177 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1178 PetscCall(MatNestGetISs(A, rows, cols)); 1179 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1180 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1181 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1182 else (void)maxnnz; 1183 1184 cumnnz = 0; 1185 for (PetscInt r = 0; r < nr; r++) { 1186 for (PetscInt c = 0; c < nc; c++) { 1187 Mat sub = mats[r][c]; 1188 const PetscInt *ridx = rows_idx[r]; 1189 const PetscInt *cidx = cols_idx[c]; 1190 PetscScalar vscale = 1.0, vshift = 0.0; 1191 PetscInt rst, size, bs; 1192 PetscSF csf; 1193 PetscBool conjugate = PETSC_FALSE, swap = PETSC_FALSE; 1194 PetscLayout cmap; 1195 PetscInt innz; 1196 1197 mumps->nest_vals_start[r * nc + c] = cumnnz; 1198 if (c == r) { 1199 PetscCall(ISGetSize(rows[r], &size)); 1200 if (!mumps->nest_convert_to_triples[r * nc + c]) { 1201 for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 1202 } 1203 PetscCall(MatGetBlockSize(sub, &bs)); 1204 Mbs += size / bs; 1205 } 1206 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1207 1208 /* Extract inner blocks if needed */ 1209 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap)); 1210 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1211 1212 /* Get column layout to map off-process columns */ 1213 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1214 1215 /* Get row start to map on-process rows */ 1216 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1217 1218 /* Directly use the mumps datastructure and use C ordering for now */ 1219 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1220 1221 /* Swap the role of rows and columns indices for transposed blocks 1222 since we need values with global final ordering */ 1223 if (swap) { 1224 cidx = rows_idx[r]; 1225 ridx = cols_idx[c]; 1226 } 1227 1228 /* Communicate column indices 1229 This could have been done with a single SF but it would have complicated the code a lot. 1230 But since we do it only once, we pay the price of setting up an SF for each block */ 1231 if (PetscDefined(USE_64BIT_INDICES)) { 1232 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1233 } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */ 1234 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1235 PetscCall(PetscIntCast(mumps->nnz, &innz)); 1236 PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1237 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1238 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1239 PetscCall(PetscSFDestroy(&csf)); 1240 1241 /* Import indices: use direct map for rows and mapped indices for columns */ 1242 if (swap) { 1243 for (PetscInt k = 0; k < mumps->nnz; k++) { 1244 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1245 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1246 } 1247 } else { 1248 for (PetscInt k = 0; k < mumps->nnz; k++) { 1249 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1250 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1251 } 1252 } 1253 1254 /* Import values to full COO */ 1255 if (conjugate) { /* conjugate the entries */ 1256 PetscScalar *v = vals + cumnnz; 1257 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]); 1258 } else if (vscale != 1.0) { 1259 PetscScalar *v = vals + cumnnz; 1260 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k]; 1261 } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1262 1263 /* Shift new starting point and sanity check */ 1264 cumnnz += mumps->nnz; 1265 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1266 1267 /* Free scratch memory */ 1268 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1269 PetscCall(PetscFree(mumps->val_alloc)); 1270 mumps->val = NULL; 1271 mumps->nnz = 0; 1272 } 1273 } 1274 if (mumps->id.ICNTL(15) == 1) { 1275 if (Mbs != A->rmap->N) { 1276 PetscMPIInt rank, size; 1277 1278 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 1279 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1280 if (rank == 0) { 1281 PetscInt shift = 0; 1282 1283 PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk)); 1284 PetscCall(PetscFree(mumps->id.blkptr)); 1285 PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr)); 1286 mumps->id.blkptr[0] = 1; 1287 for (PetscInt i = 0; i < size; ++i) { 1288 for (PetscInt r = 0; r < nr; r++) { 1289 Mat sub = mats[r][r]; 1290 const PetscInt *ranges; 1291 PetscInt bs; 1292 1293 for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 1294 PetscCall(MatGetOwnershipRanges(sub, &ranges)); 1295 PetscCall(MatGetBlockSize(sub, &bs)); 1296 for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1)); 1297 shift += (ranges[i + 1] - ranges[i]) / bs; 1298 } 1299 } 1300 } 1301 } else mumps->id.ICNTL(15) = 0; 1302 } 1303 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1304 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1305 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1306 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1307 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1308 mumps->nest_vals_start[nr * nc] = cumnnz; 1309 1310 /* Set pointers for final MUMPS data structure */ 1311 mumps->nest_vals = vals; 1312 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1313 mumps->val = vals; 1314 mumps->irn = irns; 1315 mumps->jcn = jcns; 1316 mumps->nnz = cumnnz; 1317 } else { 1318 PetscScalar *oval = mumps->nest_vals; 1319 for (PetscInt r = 0; r < nr; r++) { 1320 for (PetscInt c = 0; c < nc; c++) { 1321 PetscBool conjugate = PETSC_FALSE; 1322 Mat sub = mats[r][c]; 1323 PetscScalar vscale = 1.0, vshift = 0.0; 1324 PetscInt midx = r * nc + c; 1325 1326 if (!mumps->nest_convert_to_triples[midx]) continue; 1327 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL)); 1328 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1329 mumps->val = oval + mumps->nest_vals_start[midx]; 1330 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1331 if (conjugate) { 1332 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1333 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]); 1334 } else if (vscale != 1.0) { 1335 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1336 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale; 1337 } 1338 } 1339 } 1340 mumps->val = oval; 1341 } 1342 PetscFunctionReturn(PETSC_SUCCESS); 1343 } 1344 1345 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1346 { 1347 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1348 1349 PetscFunctionBegin; 1350 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1351 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1352 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1353 PetscCall(VecDestroy(&mumps->b_seq)); 1354 PetscCall(VecDestroy(&mumps->x_seq)); 1355 PetscCall(PetscFree(mumps->id.perm_in)); 1356 PetscCall(PetscFree(mumps->id.blkvar)); 1357 PetscCall(PetscFree(mumps->id.blkptr)); 1358 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1359 PetscCall(PetscFree(mumps->val_alloc)); 1360 PetscCall(PetscFree(mumps->info)); 1361 PetscCall(PetscFree(mumps->ICNTL_pre)); 1362 PetscCall(PetscFree(mumps->CNTL_pre)); 1363 PetscCall(MatMumpsResetSchur_Private(mumps)); 1364 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1365 mumps->id.job = JOB_END; 1366 PetscMUMPS_c(mumps); 1367 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1368 if (mumps->mumps_comm != MPI_COMM_NULL) { 1369 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1370 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1371 } 1372 } 1373 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1374 if (mumps->use_petsc_omp_support) { 1375 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1376 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1377 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1378 } 1379 #endif 1380 PetscCall(PetscFree(mumps->ia_alloc)); 1381 PetscCall(PetscFree(mumps->ja_alloc)); 1382 PetscCall(PetscFree(mumps->recvcount)); 1383 PetscCall(PetscFree(mumps->reqs)); 1384 PetscCall(PetscFree(mumps->irhs_loc)); 1385 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1386 PetscCall(PetscFree(mumps->nest_vals)); 1387 PetscCall(PetscFree(A->data)); 1388 1389 /* clear composed functions */ 1390 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1391 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1392 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1393 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1394 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1395 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1396 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1397 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1398 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1399 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1400 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1401 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1402 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1403 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1404 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL)); 1405 PetscFunctionReturn(PETSC_SUCCESS); 1406 } 1407 1408 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1409 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1410 { 1411 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1412 const PetscMPIInt ompsize = mumps->omp_comm_size; 1413 PetscInt i, m, M, rstart; 1414 1415 PetscFunctionBegin; 1416 PetscCall(MatGetSize(A, &M, NULL)); 1417 PetscCall(MatGetLocalSize(A, &m, NULL)); 1418 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1419 if (ompsize == 1) { 1420 if (!mumps->irhs_loc) { 1421 mumps->nloc_rhs = (PetscMUMPSInt)m; 1422 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1423 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1424 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */ 1425 } 1426 mumps->id.rhs_loc = (MumpsScalar *)array; 1427 } else { 1428 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1429 const PetscInt *ranges; 1430 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1431 MPI_Group petsc_group, omp_group; 1432 PetscScalar *recvbuf = NULL; 1433 1434 if (mumps->is_omp_master) { 1435 /* Lazily initialize the omp stuff for distributed rhs */ 1436 if (!mumps->irhs_loc) { 1437 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1438 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1439 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1440 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1441 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1442 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1443 1444 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1445 mumps->nloc_rhs = 0; 1446 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1447 for (j = 0; j < ompsize; j++) { 1448 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1449 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1450 } 1451 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1452 for (j = k = 0; j < ompsize; j++) { 1453 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1454 } 1455 1456 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1457 PetscCallMPI(MPI_Group_free(&petsc_group)); 1458 PetscCallMPI(MPI_Group_free(&omp_group)); 1459 } 1460 1461 /* Realloc buffers when current nrhs is bigger than what we have met */ 1462 if (nrhs > mumps->max_nrhs) { 1463 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1464 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1465 mumps->max_nrhs = nrhs; 1466 } 1467 1468 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1469 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1470 mumps->rhs_disps[0] = 0; 1471 for (j = 1; j < ompsize; j++) { 1472 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1473 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1474 } 1475 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1476 } 1477 1478 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1479 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1480 1481 if (mumps->is_omp_master) { 1482 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1483 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1484 for (j = 0; j < ompsize; j++) { 1485 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1486 dst = dstbase; 1487 for (i = 0; i < nrhs; i++) { 1488 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1489 src += mumps->rhs_nrow[j]; 1490 dst += mumps->nloc_rhs; 1491 } 1492 dstbase += mumps->rhs_nrow[j]; 1493 } 1494 } 1495 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1496 } 1497 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1498 } 1499 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 1500 mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs; 1501 mumps->id.lrhs_loc = mumps->nloc_rhs; 1502 mumps->id.irhs_loc = mumps->irhs_loc; 1503 PetscFunctionReturn(PETSC_SUCCESS); 1504 } 1505 1506 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1507 { 1508 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1509 const PetscScalar *rarray = NULL; 1510 PetscScalar *array; 1511 IS is_iden, is_petsc; 1512 PetscInt i; 1513 PetscBool second_solve = PETSC_FALSE; 1514 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1515 1516 PetscFunctionBegin; 1517 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1518 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1519 &cite1)); 1520 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1521 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1522 &cite2)); 1523 1524 PetscCall(VecFlag(x, A->factorerrortype)); 1525 if (A->factorerrortype) { 1526 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1527 PetscFunctionReturn(PETSC_SUCCESS); 1528 } 1529 1530 mumps->id.nrhs = 1; 1531 if (mumps->petsc_size > 1) { 1532 if (mumps->ICNTL20 == 10) { 1533 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1534 PetscCall(VecGetArrayRead(b, &rarray)); 1535 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1536 } else { 1537 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1538 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1539 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1540 if (!mumps->myid) { 1541 PetscCall(VecGetArray(mumps->b_seq, &array)); 1542 mumps->id.rhs = (MumpsScalar *)array; 1543 } 1544 } 1545 } else { /* petsc_size == 1 */ 1546 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1547 PetscCall(VecCopy(b, x)); 1548 PetscCall(VecGetArray(x, &array)); 1549 mumps->id.rhs = (MumpsScalar *)array; 1550 } 1551 1552 /* 1553 handle condensation step of Schur complement (if any) 1554 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1555 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1556 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1557 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1558 */ 1559 if (mumps->id.size_schur > 0) { 1560 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1561 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1562 second_solve = PETSC_TRUE; 1563 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1564 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1565 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1566 } 1567 /* solve phase */ 1568 mumps->id.job = JOB_SOLVE; 1569 PetscMUMPS_c(mumps); 1570 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1571 1572 /* handle expansion step of Schur complement (if any) */ 1573 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1574 else if (mumps->id.ICNTL(26) == 1) { 1575 PetscCall(MatMumpsSolveSchur_Private(A)); 1576 for (i = 0; i < mumps->id.size_schur; ++i) { 1577 #if !defined(PETSC_USE_COMPLEX) 1578 PetscScalar val = mumps->id.redrhs[i]; 1579 #else 1580 PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i; 1581 #endif 1582 array[mumps->id.listvar_schur[i] - 1] = val; 1583 } 1584 } 1585 1586 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */ 1587 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1588 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1589 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1590 } 1591 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1592 PetscInt *isol2_loc = NULL; 1593 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1594 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1595 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1596 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1597 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1598 PetscCall(ISDestroy(&is_iden)); 1599 PetscCall(ISDestroy(&is_petsc)); 1600 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1601 } 1602 1603 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1604 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1605 } 1606 1607 if (mumps->petsc_size > 1) { 1608 if (mumps->ICNTL20 == 10) { 1609 PetscCall(VecRestoreArrayRead(b, &rarray)); 1610 } else if (!mumps->myid) { 1611 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1612 } 1613 } else PetscCall(VecRestoreArray(x, &array)); 1614 1615 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1616 PetscFunctionReturn(PETSC_SUCCESS); 1617 } 1618 1619 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1620 { 1621 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1622 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1623 1624 PetscFunctionBegin; 1625 mumps->id.ICNTL(9) = 0; 1626 PetscCall(MatSolve_MUMPS(A, b, x)); 1627 mumps->id.ICNTL(9) = value; 1628 PetscFunctionReturn(PETSC_SUCCESS); 1629 } 1630 1631 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1632 { 1633 Mat Bt = NULL; 1634 PetscBool denseX, denseB, flg, flgT; 1635 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1636 PetscInt i, nrhs, M, nrhsM; 1637 PetscScalar *array; 1638 const PetscScalar *rbray; 1639 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1640 PetscMUMPSInt *isol_loc, *isol_loc_save; 1641 PetscScalar *bray, *sol_loc, *sol_loc_save; 1642 IS is_to, is_from; 1643 PetscInt k, proc, j, m, myrstart; 1644 const PetscInt *rstart; 1645 Vec v_mpi, msol_loc; 1646 VecScatter scat_sol; 1647 Vec b_seq; 1648 VecScatter scat_rhs; 1649 PetscScalar *aa; 1650 PetscInt spnr, *ia, *ja; 1651 Mat_MPIAIJ *b = NULL; 1652 1653 PetscFunctionBegin; 1654 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1655 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1656 1657 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1658 if (denseB) { 1659 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1660 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1661 } else { /* sparse B */ 1662 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1663 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1664 PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1665 PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 1666 /* input B is transpose of actual RHS matrix, 1667 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1668 PetscCall(MatTransposeGetMat(B, &Bt)); 1669 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1670 } 1671 1672 PetscCall(MatGetSize(B, &M, &nrhs)); 1673 PetscCall(PetscIntMultError(nrhs, M, &nrhsM)); 1674 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 1675 mumps->id.lrhs = (PetscMUMPSInt)M; 1676 mumps->id.rhs = NULL; 1677 1678 if (mumps->petsc_size == 1) { 1679 PetscScalar *aa; 1680 PetscInt spnr, *ia, *ja; 1681 PetscBool second_solve = PETSC_FALSE; 1682 1683 PetscCall(MatDenseGetArray(X, &array)); 1684 mumps->id.rhs = (MumpsScalar *)array; 1685 1686 if (denseB) { 1687 /* copy B to X */ 1688 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1689 PetscCall(PetscArraycpy(array, rbray, nrhsM)); 1690 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1691 } else { /* sparse B */ 1692 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1693 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1694 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1695 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1696 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1697 } 1698 /* handle condensation step of Schur complement (if any) */ 1699 if (mumps->id.size_schur > 0) { 1700 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1701 second_solve = PETSC_TRUE; 1702 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1703 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1704 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1705 } 1706 /* solve phase */ 1707 mumps->id.job = JOB_SOLVE; 1708 PetscMUMPS_c(mumps); 1709 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1710 1711 /* handle expansion step of Schur complement (if any) */ 1712 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1713 else if (mumps->id.ICNTL(26) == 1) { 1714 PetscCall(MatMumpsSolveSchur_Private(A)); 1715 for (j = 0; j < nrhs; ++j) 1716 for (i = 0; i < mumps->id.size_schur; ++i) { 1717 #if !defined(PETSC_USE_COMPLEX) 1718 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs]; 1719 #else 1720 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i; 1721 #endif 1722 array[mumps->id.listvar_schur[i] - 1 + j * M] = val; 1723 } 1724 } 1725 if (!denseB) { /* sparse B */ 1726 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1727 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1728 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1729 } 1730 PetscCall(MatDenseRestoreArray(X, &array)); 1731 PetscFunctionReturn(PETSC_SUCCESS); 1732 } 1733 1734 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1735 PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1736 1737 /* create msol_loc to hold mumps local solution */ 1738 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1739 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1740 1741 lsol_loc = mumps->id.lsol_loc; 1742 PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */ 1743 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1744 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1745 mumps->id.isol_loc = isol_loc; 1746 1747 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1748 1749 if (denseB) { 1750 if (mumps->ICNTL20 == 10) { 1751 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1752 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1753 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1754 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1755 PetscCall(MatGetLocalSize(B, &m, NULL)); 1756 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); 1757 } else { 1758 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1759 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1760 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1761 0, re-arrange B into desired order, which is a local operation. 1762 */ 1763 1764 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1765 /* wrap dense rhs matrix B into a vector v_mpi */ 1766 PetscCall(MatGetLocalSize(B, &m, NULL)); 1767 PetscCall(MatDenseGetArray(B, &bray)); 1768 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, (const PetscScalar *)bray, &v_mpi)); 1769 PetscCall(MatDenseRestoreArray(B, &bray)); 1770 1771 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1772 if (!mumps->myid) { 1773 PetscInt *idx; 1774 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1775 PetscCall(PetscMalloc1(nrhsM, &idx)); 1776 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1777 for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) { 1778 for (j = 0; j < nrhs; j++) { 1779 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1780 } 1781 } 1782 1783 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq)); 1784 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to)); 1785 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from)); 1786 } else { 1787 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1788 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1789 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1790 } 1791 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1792 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1793 PetscCall(ISDestroy(&is_to)); 1794 PetscCall(ISDestroy(&is_from)); 1795 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1796 1797 if (!mumps->myid) { /* define rhs on the host */ 1798 PetscCall(VecGetArray(b_seq, &bray)); 1799 mumps->id.rhs = (MumpsScalar *)bray; 1800 PetscCall(VecRestoreArray(b_seq, &bray)); 1801 } 1802 } 1803 } else { /* sparse B */ 1804 b = (Mat_MPIAIJ *)Bt->data; 1805 1806 /* wrap dense X into a vector v_mpi */ 1807 PetscCall(MatGetLocalSize(X, &m, NULL)); 1808 PetscCall(MatDenseGetArray(X, &bray)); 1809 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, (const PetscScalar *)bray, &v_mpi)); 1810 PetscCall(MatDenseRestoreArray(X, &bray)); 1811 1812 if (!mumps->myid) { 1813 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1814 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1815 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1816 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1817 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1818 } else { 1819 mumps->id.irhs_ptr = NULL; 1820 mumps->id.irhs_sparse = NULL; 1821 mumps->id.nz_rhs = 0; 1822 mumps->id.rhs_sparse = NULL; 1823 } 1824 } 1825 1826 /* solve phase */ 1827 mumps->id.job = JOB_SOLVE; 1828 PetscMUMPS_c(mumps); 1829 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1830 1831 /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */ 1832 PetscCall(MatDenseGetArray(X, &array)); 1833 PetscCall(VecPlaceArray(v_mpi, array)); 1834 1835 /* create scatter scat_sol */ 1836 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1837 /* iidx: index for scatter mumps solution to PETSc X */ 1838 1839 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1840 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1841 for (i = 0; i < lsol_loc; i++) { 1842 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1843 1844 for (proc = 0; proc < mumps->petsc_size; proc++) { 1845 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1846 myrstart = rstart[proc]; 1847 k = isol_loc[i] - myrstart; /* local index on 1st column of PETSc vector X */ 1848 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to PETSc index in X */ 1849 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1850 break; 1851 } 1852 } 1853 1854 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1855 } 1856 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1857 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1858 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1859 PetscCall(ISDestroy(&is_from)); 1860 PetscCall(ISDestroy(&is_to)); 1861 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1862 PetscCall(MatDenseRestoreArray(X, &array)); 1863 1864 /* free spaces */ 1865 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1866 mumps->id.isol_loc = isol_loc_save; 1867 1868 PetscCall(PetscFree2(sol_loc, isol_loc)); 1869 PetscCall(PetscFree(idxx)); 1870 PetscCall(VecDestroy(&msol_loc)); 1871 PetscCall(VecDestroy(&v_mpi)); 1872 if (!denseB) { 1873 if (!mumps->myid) { 1874 b = (Mat_MPIAIJ *)Bt->data; 1875 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1876 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1877 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1878 } 1879 } else { 1880 if (mumps->ICNTL20 == 0) { 1881 PetscCall(VecDestroy(&b_seq)); 1882 PetscCall(VecScatterDestroy(&scat_rhs)); 1883 } 1884 } 1885 PetscCall(VecScatterDestroy(&scat_sol)); 1886 PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1887 PetscFunctionReturn(PETSC_SUCCESS); 1888 } 1889 1890 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1891 { 1892 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1893 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1894 1895 PetscFunctionBegin; 1896 mumps->id.ICNTL(9) = 0; 1897 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1898 mumps->id.ICNTL(9) = value; 1899 PetscFunctionReturn(PETSC_SUCCESS); 1900 } 1901 1902 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1903 { 1904 PetscBool flg; 1905 Mat B; 1906 1907 PetscFunctionBegin; 1908 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1909 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1910 1911 /* Create B=Bt^T that uses Bt's data structure */ 1912 PetscCall(MatCreateTranspose(Bt, &B)); 1913 1914 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1915 PetscCall(MatDestroy(&B)); 1916 PetscFunctionReturn(PETSC_SUCCESS); 1917 } 1918 1919 #if !defined(PETSC_USE_COMPLEX) 1920 /* 1921 input: 1922 F: numeric factor 1923 output: 1924 nneg: total number of negative pivots 1925 nzero: total number of zero pivots 1926 npos: (global dimension of F) - nneg - nzero 1927 */ 1928 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1929 { 1930 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1931 PetscMPIInt size; 1932 1933 PetscFunctionBegin; 1934 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1935 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1936 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1937 1938 if (nneg) *nneg = mumps->id.INFOG(12); 1939 if (nzero || npos) { 1940 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1941 if (nzero) *nzero = mumps->id.INFOG(28); 1942 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1943 } 1944 PetscFunctionReturn(PETSC_SUCCESS); 1945 } 1946 #endif 1947 1948 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1949 { 1950 PetscMPIInt nreqs; 1951 PetscMUMPSInt *irn, *jcn; 1952 PetscMPIInt count; 1953 PetscCount totnnz, remain; 1954 const PetscInt osize = mumps->omp_comm_size; 1955 PetscScalar *val; 1956 1957 PetscFunctionBegin; 1958 if (osize > 1) { 1959 if (reuse == MAT_INITIAL_MATRIX) { 1960 /* master first gathers counts of nonzeros to receive */ 1961 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1962 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1963 1964 /* Then each computes number of send/recvs */ 1965 if (mumps->is_omp_master) { 1966 /* Start from 1 since self communication is not done in MPI */ 1967 nreqs = 0; 1968 for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1969 } else { 1970 nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX)); 1971 } 1972 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1973 1974 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1975 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1976 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1977 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1978 */ 1979 nreqs = 0; /* counter for actual send/recvs */ 1980 if (mumps->is_omp_master) { 1981 totnnz = 0; 1982 1983 for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1984 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1985 PetscCall(PetscMalloc1(totnnz, &val)); 1986 1987 /* Self communication */ 1988 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1989 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1990 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1991 1992 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1993 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1994 PetscCall(PetscFree(mumps->val_alloc)); 1995 mumps->nnz = totnnz; 1996 mumps->irn = irn; 1997 mumps->jcn = jcn; 1998 mumps->val = mumps->val_alloc = val; 1999 2000 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 2001 jcn += mumps->recvcount[0]; 2002 val += mumps->recvcount[0]; 2003 2004 /* Remote communication */ 2005 for (PetscMPIInt i = 1; i < osize; i++) { 2006 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2007 remain = mumps->recvcount[i] - count; 2008 while (count > 0) { 2009 PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2010 PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2011 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2012 irn += count; 2013 jcn += count; 2014 val += count; 2015 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2016 remain -= count; 2017 } 2018 } 2019 } else { 2020 irn = mumps->irn; 2021 jcn = mumps->jcn; 2022 val = mumps->val; 2023 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2024 remain = mumps->nnz - count; 2025 while (count > 0) { 2026 PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2027 PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2028 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2029 irn += count; 2030 jcn += count; 2031 val += count; 2032 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2033 remain -= count; 2034 } 2035 } 2036 } else { 2037 nreqs = 0; 2038 if (mumps->is_omp_master) { 2039 val = mumps->val + mumps->recvcount[0]; 2040 for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 2041 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2042 remain = mumps->recvcount[i] - count; 2043 while (count > 0) { 2044 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2045 val += count; 2046 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2047 remain -= count; 2048 } 2049 } 2050 } else { 2051 val = mumps->val; 2052 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2053 remain = mumps->nnz - count; 2054 while (count > 0) { 2055 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2056 val += count; 2057 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2058 remain -= count; 2059 } 2060 } 2061 } 2062 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 2063 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 2064 } 2065 PetscFunctionReturn(PETSC_SUCCESS); 2066 } 2067 2068 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info) 2069 { 2070 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2071 PetscBool isMPIAIJ; 2072 2073 PetscFunctionBegin; 2074 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 2075 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2076 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2077 PetscFunctionReturn(PETSC_SUCCESS); 2078 } 2079 2080 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 2081 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 2082 2083 /* numerical factorization phase */ 2084 mumps->id.job = JOB_FACTNUMERIC; 2085 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2086 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 2087 } else { 2088 mumps->id.a_loc = (MumpsScalar *)mumps->val; 2089 } 2090 PetscMUMPS_c(mumps); 2091 if (mumps->id.INFOG(1) < 0) { 2092 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2093 if (mumps->id.INFOG(1) == -10) { 2094 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2095 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2096 } else if (mumps->id.INFOG(1) == -13) { 2097 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2098 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2099 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2100 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2101 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2102 } else { 2103 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2104 F->factorerrortype = MAT_FACTOR_OTHER; 2105 } 2106 } 2107 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2108 2109 F->assembled = PETSC_TRUE; 2110 2111 if (F->schur) { /* reset Schur status to unfactored */ 2112 #if defined(PETSC_HAVE_CUDA) 2113 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 2114 #endif 2115 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2116 mumps->id.ICNTL(19) = 2; 2117 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2118 } 2119 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2120 } 2121 2122 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2123 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2124 2125 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2126 if (mumps->petsc_size > 1) { 2127 PetscInt lsol_loc; 2128 PetscScalar *sol_loc; 2129 2130 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 2131 2132 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2133 if (mumps->x_seq) { 2134 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2135 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 2136 PetscCall(VecDestroy(&mumps->x_seq)); 2137 } 2138 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2139 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 2140 mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc; 2141 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 2142 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 2143 } 2144 PetscCall(PetscLogFlops((double)mumps->id.RINFO(2))); 2145 PetscFunctionReturn(PETSC_SUCCESS); 2146 } 2147 2148 /* Sets MUMPS options from the options database */ 2149 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2150 { 2151 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2152 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2153 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2154 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 2155 MumpsScalar *arr; 2156 2157 PetscFunctionBegin; 2158 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2159 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2160 PetscInt nthreads = 0; 2161 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2162 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2163 PetscMUMPSInt nblk, *blkvar, *blkptr; 2164 2165 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2166 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2167 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2168 2169 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2170 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2171 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2172 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2173 if (mumps->use_petsc_omp_support) { 2174 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2175 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2176 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2177 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2178 #else 2179 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2180 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2181 #endif 2182 } else { 2183 mumps->omp_comm = PETSC_COMM_SELF; 2184 mumps->mumps_comm = mumps->petsc_comm; 2185 mumps->is_omp_master = PETSC_TRUE; 2186 } 2187 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2188 mumps->reqs = NULL; 2189 mumps->tag = 0; 2190 2191 if (mumps->mumps_comm != MPI_COMM_NULL) { 2192 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2193 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2194 MPI_Comm comm; 2195 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2196 mumps->mumps_comm = comm; 2197 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2198 } 2199 2200 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2201 mumps->id.job = JOB_INIT; 2202 mumps->id.par = 1; /* host participates factorizaton and solve */ 2203 mumps->id.sym = mumps->sym; 2204 2205 size = mumps->id.size_schur; 2206 arr = mumps->id.schur; 2207 listvar_schur = mumps->id.listvar_schur; 2208 nblk = mumps->id.nblk; 2209 blkvar = mumps->id.blkvar; 2210 blkptr = mumps->id.blkptr; 2211 PetscMUMPS_c(mumps); 2212 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2213 2214 /* set PETSc-MUMPS default options - override MUMPS default */ 2215 mumps->id.ICNTL(3) = 0; 2216 mumps->id.ICNTL(4) = 0; 2217 if (mumps->petsc_size == 1) { 2218 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2219 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2220 } else { 2221 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2222 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2223 } 2224 if (nblk && blkptr) { 2225 mumps->id.ICNTL(15) = 1; 2226 mumps->id.nblk = nblk; 2227 mumps->id.blkvar = blkvar; 2228 mumps->id.blkptr = blkptr; 2229 } 2230 2231 /* restore cached ICNTL and CNTL values */ 2232 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2233 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2234 PetscCall(PetscFree(mumps->ICNTL_pre)); 2235 PetscCall(PetscFree(mumps->CNTL_pre)); 2236 2237 if (schur) { 2238 mumps->id.size_schur = size; 2239 mumps->id.schur_lld = size; 2240 mumps->id.schur = arr; 2241 mumps->id.listvar_schur = listvar_schur; 2242 if (mumps->petsc_size > 1) { 2243 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2244 2245 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2246 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2247 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2248 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2249 } else { 2250 if (F->factortype == MAT_FACTOR_LU) { 2251 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2252 } else { 2253 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2254 } 2255 } 2256 mumps->id.ICNTL(26) = -1; 2257 } 2258 2259 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2260 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2261 */ 2262 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2263 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2264 2265 mumps->scat_rhs = NULL; 2266 mumps->scat_sol = NULL; 2267 } 2268 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2269 if (flg) mumps->id.ICNTL(1) = icntl; 2270 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2271 if (flg) mumps->id.ICNTL(2) = icntl; 2272 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2273 if (flg) mumps->id.ICNTL(3) = icntl; 2274 2275 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2276 if (flg) mumps->id.ICNTL(4) = icntl; 2277 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2278 2279 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2280 if (flg) mumps->id.ICNTL(6) = icntl; 2281 2282 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2283 if (flg) { 2284 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2285 mumps->id.ICNTL(7) = icntl; 2286 } 2287 2288 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2289 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2290 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2291 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2292 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2293 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2294 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2295 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2296 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs; 2297 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2298 if (flg) { 2299 if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2300 else if (mumps->id.ICNTL(15) > 0) { 2301 const PetscInt *bsizes; 2302 PetscInt nblocks, p, *blkptr = NULL; 2303 PetscMPIInt *recvcounts, *displs, n; 2304 PetscMPIInt rank, size = 0; 2305 2306 PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes)); 2307 flg = PETSC_TRUE; 2308 for (p = 0; p < nblocks; ++p) { 2309 if (bsizes[p] > 1) break; 2310 } 2311 if (p == nblocks) flg = PETSC_FALSE; 2312 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 2313 if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1 2314 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2315 if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2316 PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs)); 2317 PetscCall(PetscMPIIntCast(nblocks, &n)); 2318 PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A))); 2319 for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p]; 2320 PetscCall(PetscMalloc1(displs[size] + 1, &blkptr)); 2321 PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 2322 PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 2323 if (rank == 0) { 2324 blkptr[0] = 1; 2325 for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p]; 2326 PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr)); 2327 } 2328 PetscCall(PetscFree2(recvcounts, displs)); 2329 PetscCall(PetscFree(blkptr)); 2330 } 2331 } 2332 } 2333 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2334 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2335 PetscCall(MatDestroy(&F->schur)); 2336 PetscCall(MatMumpsResetSchur_Private(mumps)); 2337 } 2338 2339 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2340 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2341 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2342 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2343 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2344 In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 2345 */ 2346 mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */ 2347 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI) 2348 mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */ 2349 #endif 2350 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2351 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2352 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2353 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2354 #endif 2355 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2356 2357 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2358 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2359 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2360 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2361 2362 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2363 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2364 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2365 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2366 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2367 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2368 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2369 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2370 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2371 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2372 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2373 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL)); 2374 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2375 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL)); 2376 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL)); 2377 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2378 2379 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2380 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2381 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2382 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2383 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2384 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2385 2386 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2387 2388 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2389 if (ninfo) { 2390 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2391 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2392 mumps->ninfo = ninfo; 2393 for (i = 0; i < ninfo; i++) { 2394 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2395 mumps->info[i] = info[i]; 2396 } 2397 } 2398 PetscOptionsEnd(); 2399 PetscFunctionReturn(PETSC_SUCCESS); 2400 } 2401 2402 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps) 2403 { 2404 PetscFunctionBegin; 2405 if (mumps->id.INFOG(1) < 0) { 2406 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2407 if (mumps->id.INFOG(1) == -6) { 2408 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2409 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2410 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2411 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2412 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2413 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2414 PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n")); 2415 } else { 2416 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2417 F->factorerrortype = MAT_FACTOR_OTHER; 2418 } 2419 } 2420 if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR; 2421 PetscFunctionReturn(PETSC_SUCCESS); 2422 } 2423 2424 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2425 { 2426 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2427 Vec b; 2428 const PetscInt M = A->rmap->N; 2429 2430 PetscFunctionBegin; 2431 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2432 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2433 PetscFunctionReturn(PETSC_SUCCESS); 2434 } 2435 2436 /* Set MUMPS options from the options database */ 2437 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2438 2439 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2440 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2441 2442 /* analysis phase */ 2443 mumps->id.job = JOB_FACTSYMBOLIC; 2444 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2445 switch (mumps->id.ICNTL(18)) { 2446 case 0: /* centralized assembled matrix input */ 2447 if (!mumps->myid) { 2448 mumps->id.nnz = mumps->nnz; 2449 mumps->id.irn = mumps->irn; 2450 mumps->id.jcn = mumps->jcn; 2451 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2452 if (r && mumps->id.ICNTL(7) == 7) { 2453 mumps->id.ICNTL(7) = 1; 2454 if (!mumps->myid) { 2455 const PetscInt *idx; 2456 PetscInt i; 2457 2458 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2459 PetscCall(ISGetIndices(r, &idx)); 2460 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */ 2461 PetscCall(ISRestoreIndices(r, &idx)); 2462 } 2463 } 2464 } 2465 break; 2466 case 3: /* distributed assembled matrix input (size>1) */ 2467 mumps->id.nnz_loc = mumps->nnz; 2468 mumps->id.irn_loc = mumps->irn; 2469 mumps->id.jcn_loc = mumps->jcn; 2470 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2471 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2472 PetscCall(MatCreateVecs(A, NULL, &b)); 2473 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2474 PetscCall(VecDestroy(&b)); 2475 } 2476 break; 2477 } 2478 PetscMUMPS_c(mumps); 2479 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2480 2481 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2482 F->ops->solve = MatSolve_MUMPS; 2483 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2484 F->ops->matsolve = MatMatSolve_MUMPS; 2485 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2486 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2487 2488 mumps->matstruc = SAME_NONZERO_PATTERN; 2489 PetscFunctionReturn(PETSC_SUCCESS); 2490 } 2491 2492 /* Note the PETSc r and c permutations are ignored */ 2493 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2494 { 2495 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2496 Vec b; 2497 const PetscInt M = A->rmap->N; 2498 2499 PetscFunctionBegin; 2500 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2501 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2502 PetscFunctionReturn(PETSC_SUCCESS); 2503 } 2504 2505 /* Set MUMPS options from the options database */ 2506 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2507 2508 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2509 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2510 2511 /* analysis phase */ 2512 mumps->id.job = JOB_FACTSYMBOLIC; 2513 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2514 switch (mumps->id.ICNTL(18)) { 2515 case 0: /* centralized assembled matrix input */ 2516 if (!mumps->myid) { 2517 mumps->id.nnz = mumps->nnz; 2518 mumps->id.irn = mumps->irn; 2519 mumps->id.jcn = mumps->jcn; 2520 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2521 } 2522 break; 2523 case 3: /* distributed assembled matrix input (size>1) */ 2524 mumps->id.nnz_loc = mumps->nnz; 2525 mumps->id.irn_loc = mumps->irn; 2526 mumps->id.jcn_loc = mumps->jcn; 2527 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2528 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2529 PetscCall(MatCreateVecs(A, NULL, &b)); 2530 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2531 PetscCall(VecDestroy(&b)); 2532 } 2533 break; 2534 } 2535 PetscMUMPS_c(mumps); 2536 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2537 2538 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2539 F->ops->solve = MatSolve_MUMPS; 2540 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2541 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2542 2543 mumps->matstruc = SAME_NONZERO_PATTERN; 2544 PetscFunctionReturn(PETSC_SUCCESS); 2545 } 2546 2547 /* Note the PETSc r permutation and factor info are ignored */ 2548 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info) 2549 { 2550 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2551 Vec b; 2552 const PetscInt M = A->rmap->N; 2553 2554 PetscFunctionBegin; 2555 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2556 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2557 PetscFunctionReturn(PETSC_SUCCESS); 2558 } 2559 2560 /* Set MUMPS options from the options database */ 2561 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2562 2563 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2564 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2565 2566 /* analysis phase */ 2567 mumps->id.job = JOB_FACTSYMBOLIC; 2568 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2569 switch (mumps->id.ICNTL(18)) { 2570 case 0: /* centralized assembled matrix input */ 2571 if (!mumps->myid) { 2572 mumps->id.nnz = mumps->nnz; 2573 mumps->id.irn = mumps->irn; 2574 mumps->id.jcn = mumps->jcn; 2575 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2576 } 2577 break; 2578 case 3: /* distributed assembled matrix input (size>1) */ 2579 mumps->id.nnz_loc = mumps->nnz; 2580 mumps->id.irn_loc = mumps->irn; 2581 mumps->id.jcn_loc = mumps->jcn; 2582 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2583 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2584 PetscCall(MatCreateVecs(A, NULL, &b)); 2585 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2586 PetscCall(VecDestroy(&b)); 2587 } 2588 break; 2589 } 2590 PetscMUMPS_c(mumps); 2591 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2592 2593 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2594 F->ops->solve = MatSolve_MUMPS; 2595 F->ops->solvetranspose = MatSolve_MUMPS; 2596 F->ops->matsolve = MatMatSolve_MUMPS; 2597 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2598 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2599 #if defined(PETSC_USE_COMPLEX) 2600 F->ops->getinertia = NULL; 2601 #else 2602 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2603 #endif 2604 2605 mumps->matstruc = SAME_NONZERO_PATTERN; 2606 PetscFunctionReturn(PETSC_SUCCESS); 2607 } 2608 2609 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2610 { 2611 PetscBool iascii; 2612 PetscViewerFormat format; 2613 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2614 2615 PetscFunctionBegin; 2616 /* check if matrix is mumps type */ 2617 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2618 2619 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2620 if (iascii) { 2621 PetscCall(PetscViewerGetFormat(viewer, &format)); 2622 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2623 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2624 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2625 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2626 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2627 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2628 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2629 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2630 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2631 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2632 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2633 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2634 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2635 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2636 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2637 if (mumps->id.ICNTL(11) > 0) { 2638 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)mumps->id.RINFOG(4))); 2639 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)mumps->id.RINFOG(5))); 2640 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)mumps->id.RINFOG(6))); 2641 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)mumps->id.RINFOG(7), (double)mumps->id.RINFOG(8))); 2642 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)mumps->id.RINFOG(9))); 2643 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)mumps->id.RINFOG(10), (double)mumps->id.RINFOG(11))); 2644 } 2645 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2646 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2647 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2648 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2649 /* ICNTL(15-17) not used */ 2650 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2651 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2652 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2653 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2654 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2655 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2656 2657 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2658 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2659 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2660 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2661 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2662 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2663 2664 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2665 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2666 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2667 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2668 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2669 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(37) (compression of the contribution blocks): %d\n", mumps->id.ICNTL(37))); 2670 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2671 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(48) (multithreading with tree parallelism): %d\n", mumps->id.ICNTL(48))); 2672 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56))); 2673 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2674 2675 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)mumps->id.CNTL(1))); 2676 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)mumps->id.CNTL(2))); 2677 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)mumps->id.CNTL(3))); 2678 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)mumps->id.CNTL(4))); 2679 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)mumps->id.CNTL(5))); 2680 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)mumps->id.CNTL(7))); 2681 2682 /* information local to each processor */ 2683 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2684 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2685 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(1))); 2686 PetscCall(PetscViewerFlush(viewer)); 2687 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2688 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(2))); 2689 PetscCall(PetscViewerFlush(viewer)); 2690 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2691 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(3))); 2692 PetscCall(PetscViewerFlush(viewer)); 2693 2694 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2695 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2696 PetscCall(PetscViewerFlush(viewer)); 2697 2698 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2699 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2700 PetscCall(PetscViewerFlush(viewer)); 2701 2702 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2703 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2704 PetscCall(PetscViewerFlush(viewer)); 2705 2706 if (mumps->ninfo && mumps->ninfo <= 80) { 2707 PetscInt i; 2708 for (i = 0; i < mumps->ninfo; i++) { 2709 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2710 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2711 PetscCall(PetscViewerFlush(viewer)); 2712 } 2713 } 2714 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2715 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2716 2717 if (mumps->myid == 0) { /* information from the host */ 2718 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)mumps->id.RINFOG(1))); 2719 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)mumps->id.RINFOG(2))); 2720 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)mumps->id.RINFOG(3))); 2721 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)mumps->id.RINFOG(12), (double)mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2722 2723 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2724 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2725 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2726 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2727 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2728 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2729 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2730 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2731 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2732 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2733 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2734 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2735 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2736 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2737 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2738 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2739 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2740 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2741 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2742 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2743 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2744 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2745 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2746 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2747 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2748 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2749 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2750 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2751 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2752 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2753 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2754 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2755 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2756 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2757 } 2758 } 2759 } 2760 PetscFunctionReturn(PETSC_SUCCESS); 2761 } 2762 2763 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info) 2764 { 2765 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2766 2767 PetscFunctionBegin; 2768 info->block_size = 1.0; 2769 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2770 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2771 info->nz_unneeded = 0.0; 2772 info->assemblies = 0.0; 2773 info->mallocs = 0.0; 2774 info->memory = 0.0; 2775 info->fill_ratio_given = 0; 2776 info->fill_ratio_needed = 0; 2777 info->factor_mallocs = 0; 2778 PetscFunctionReturn(PETSC_SUCCESS); 2779 } 2780 2781 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2782 { 2783 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2784 const PetscScalar *arr; 2785 const PetscInt *idxs; 2786 PetscInt size, i; 2787 2788 PetscFunctionBegin; 2789 PetscCall(ISGetLocalSize(is, &size)); 2790 /* Schur complement matrix */ 2791 PetscCall(MatDestroy(&F->schur)); 2792 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2793 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2794 mumps->id.schur = (MumpsScalar *)arr; 2795 PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur)); 2796 PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld)); 2797 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2798 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2799 2800 /* MUMPS expects Fortran style indices */ 2801 PetscCall(PetscFree(mumps->id.listvar_schur)); 2802 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2803 PetscCall(ISGetIndices(is, &idxs)); 2804 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i])); 2805 PetscCall(ISRestoreIndices(is, &idxs)); 2806 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2807 mumps->id.ICNTL(26) = -1; 2808 PetscFunctionReturn(PETSC_SUCCESS); 2809 } 2810 2811 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2812 { 2813 Mat St; 2814 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2815 PetscScalar *array; 2816 2817 PetscFunctionBegin; 2818 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 2819 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2820 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2821 PetscCall(MatSetType(St, MATDENSE)); 2822 PetscCall(MatSetUp(St)); 2823 PetscCall(MatDenseGetArray(St, &array)); 2824 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2825 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2826 PetscInt i, j, N = mumps->id.size_schur; 2827 for (i = 0; i < N; i++) { 2828 for (j = 0; j < N; j++) { 2829 #if !defined(PETSC_USE_COMPLEX) 2830 PetscScalar val = mumps->id.schur[i * N + j]; 2831 #else 2832 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2833 #endif 2834 array[j * N + i] = val; 2835 } 2836 } 2837 } else { /* stored by columns */ 2838 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2839 } 2840 } else { /* either full or lower-triangular (not packed) */ 2841 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2842 PetscInt i, j, N = mumps->id.size_schur; 2843 for (i = 0; i < N; i++) { 2844 for (j = i; j < N; j++) { 2845 #if !defined(PETSC_USE_COMPLEX) 2846 PetscScalar val = mumps->id.schur[i * N + j]; 2847 #else 2848 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2849 #endif 2850 array[i * N + j] = array[j * N + i] = val; 2851 } 2852 } 2853 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2854 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2855 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2856 PetscInt i, j, N = mumps->id.size_schur; 2857 for (i = 0; i < N; i++) { 2858 for (j = 0; j < i + 1; j++) { 2859 #if !defined(PETSC_USE_COMPLEX) 2860 PetscScalar val = mumps->id.schur[i * N + j]; 2861 #else 2862 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2863 #endif 2864 array[i * N + j] = array[j * N + i] = val; 2865 } 2866 } 2867 } 2868 } 2869 PetscCall(MatDenseRestoreArray(St, &array)); 2870 *S = St; 2871 PetscFunctionReturn(PETSC_SUCCESS); 2872 } 2873 2874 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2875 { 2876 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2877 2878 PetscFunctionBegin; 2879 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2880 PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2881 for (i = 0; i < nICNTL_pre; ++i) 2882 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2883 if (i == nICNTL_pre) { /* not already cached */ 2884 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2885 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2886 mumps->ICNTL_pre[0]++; 2887 } 2888 mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl; 2889 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2890 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2891 PetscFunctionReturn(PETSC_SUCCESS); 2892 } 2893 2894 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2895 { 2896 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2897 2898 PetscFunctionBegin; 2899 if (mumps->id.job == JOB_NULL) { 2900 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2901 *ival = 0; 2902 for (i = 0; i < nICNTL_pre; ++i) { 2903 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2904 } 2905 } else *ival = mumps->id.ICNTL(icntl); 2906 PetscFunctionReturn(PETSC_SUCCESS); 2907 } 2908 2909 /*@ 2910 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 2911 2912 Logically Collective 2913 2914 Input Parameters: 2915 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 2916 . icntl - index of MUMPS parameter array `ICNTL()` 2917 - ival - value of MUMPS `ICNTL(icntl)` 2918 2919 Options Database Key: 2920 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival` 2921 2922 Level: beginner 2923 2924 Note: 2925 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 2926 2927 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2928 @*/ 2929 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2930 { 2931 PetscFunctionBegin; 2932 PetscValidType(F, 1); 2933 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2934 PetscValidLogicalCollectiveInt(F, icntl, 2); 2935 PetscValidLogicalCollectiveInt(F, ival, 3); 2936 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2937 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2938 PetscFunctionReturn(PETSC_SUCCESS); 2939 } 2940 2941 /*@ 2942 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 2943 2944 Logically Collective 2945 2946 Input Parameters: 2947 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 2948 - icntl - index of MUMPS parameter array ICNTL() 2949 2950 Output Parameter: 2951 . ival - value of MUMPS ICNTL(icntl) 2952 2953 Level: beginner 2954 2955 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2956 @*/ 2957 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2958 { 2959 PetscFunctionBegin; 2960 PetscValidType(F, 1); 2961 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2962 PetscValidLogicalCollectiveInt(F, icntl, 2); 2963 PetscAssertPointer(ival, 3); 2964 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2965 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2966 PetscFunctionReturn(PETSC_SUCCESS); 2967 } 2968 2969 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2970 { 2971 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2972 2973 PetscFunctionBegin; 2974 if (mumps->id.job == JOB_NULL) { 2975 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2976 for (i = 0; i < nCNTL_pre; ++i) 2977 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2978 if (i == nCNTL_pre) { 2979 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2980 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2981 mumps->CNTL_pre[0]++; 2982 } 2983 mumps->CNTL_pre[1 + 2 * i] = icntl; 2984 mumps->CNTL_pre[2 + 2 * i] = val; 2985 } else mumps->id.CNTL(icntl) = val; 2986 PetscFunctionReturn(PETSC_SUCCESS); 2987 } 2988 2989 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2990 { 2991 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2992 2993 PetscFunctionBegin; 2994 if (mumps->id.job == JOB_NULL) { 2995 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2996 *val = 0.0; 2997 for (i = 0; i < nCNTL_pre; ++i) { 2998 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2999 } 3000 } else *val = mumps->id.CNTL(icntl); 3001 PetscFunctionReturn(PETSC_SUCCESS); 3002 } 3003 3004 /*@ 3005 MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3006 3007 Logically Collective 3008 3009 Input Parameters: 3010 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3011 . icntl - index of MUMPS parameter array `CNTL()` 3012 - val - value of MUMPS `CNTL(icntl)` 3013 3014 Options Database Key: 3015 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 3016 3017 Level: beginner 3018 3019 Note: 3020 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 3021 3022 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3023 @*/ 3024 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 3025 { 3026 PetscFunctionBegin; 3027 PetscValidType(F, 1); 3028 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3029 PetscValidLogicalCollectiveInt(F, icntl, 2); 3030 PetscValidLogicalCollectiveReal(F, val, 3); 3031 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3032 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 3033 PetscFunctionReturn(PETSC_SUCCESS); 3034 } 3035 3036 /*@ 3037 MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3038 3039 Logically Collective 3040 3041 Input Parameters: 3042 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3043 - icntl - index of MUMPS parameter array CNTL() 3044 3045 Output Parameter: 3046 . val - value of MUMPS CNTL(icntl) 3047 3048 Level: beginner 3049 3050 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3051 @*/ 3052 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 3053 { 3054 PetscFunctionBegin; 3055 PetscValidType(F, 1); 3056 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3057 PetscValidLogicalCollectiveInt(F, icntl, 2); 3058 PetscAssertPointer(val, 3); 3059 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3060 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3061 PetscFunctionReturn(PETSC_SUCCESS); 3062 } 3063 3064 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 3065 { 3066 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3067 3068 PetscFunctionBegin; 3069 *info = mumps->id.INFO(icntl); 3070 PetscFunctionReturn(PETSC_SUCCESS); 3071 } 3072 3073 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 3074 { 3075 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3076 3077 PetscFunctionBegin; 3078 *infog = mumps->id.INFOG(icntl); 3079 PetscFunctionReturn(PETSC_SUCCESS); 3080 } 3081 3082 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 3083 { 3084 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3085 3086 PetscFunctionBegin; 3087 *rinfo = mumps->id.RINFO(icntl); 3088 PetscFunctionReturn(PETSC_SUCCESS); 3089 } 3090 3091 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 3092 { 3093 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3094 3095 PetscFunctionBegin; 3096 *rinfog = mumps->id.RINFOG(icntl); 3097 PetscFunctionReturn(PETSC_SUCCESS); 3098 } 3099 3100 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 3101 { 3102 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3103 3104 PetscFunctionBegin; 3105 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 3106 *size = 0; 3107 *array = NULL; 3108 if (!mumps->myid) { 3109 *size = mumps->id.INFOG(28); 3110 PetscCall(PetscMalloc1(*size, array)); 3111 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 3112 } 3113 PetscFunctionReturn(PETSC_SUCCESS); 3114 } 3115 3116 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 3117 { 3118 Mat Bt = NULL, Btseq = NULL; 3119 PetscBool flg; 3120 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3121 PetscScalar *aa; 3122 PetscInt spnr, *ia, *ja, M, nrhs; 3123 3124 PetscFunctionBegin; 3125 PetscAssertPointer(spRHS, 2); 3126 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 3127 PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 3128 PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 3129 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 3130 3131 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3132 3133 if (mumps->petsc_size > 1) { 3134 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 3135 Btseq = b->A; 3136 } else { 3137 Btseq = Bt; 3138 } 3139 3140 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 3141 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 3142 PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs)); 3143 mumps->id.rhs = NULL; 3144 3145 if (!mumps->myid) { 3146 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 3147 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3148 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3149 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3150 mumps->id.rhs_sparse = (MumpsScalar *)aa; 3151 } else { 3152 mumps->id.irhs_ptr = NULL; 3153 mumps->id.irhs_sparse = NULL; 3154 mumps->id.nz_rhs = 0; 3155 mumps->id.rhs_sparse = NULL; 3156 } 3157 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3158 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3159 3160 /* solve phase */ 3161 mumps->id.job = JOB_SOLVE; 3162 PetscMUMPS_c(mumps); 3163 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 3164 3165 if (!mumps->myid) { 3166 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3167 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3168 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3169 } 3170 PetscFunctionReturn(PETSC_SUCCESS); 3171 } 3172 3173 /*@ 3174 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc> 3175 3176 Logically Collective 3177 3178 Input Parameter: 3179 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3180 3181 Output Parameter: 3182 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3183 3184 Level: beginner 3185 3186 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3187 @*/ 3188 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3189 { 3190 PetscFunctionBegin; 3191 PetscValidType(F, 1); 3192 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3193 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3194 PetscFunctionReturn(PETSC_SUCCESS); 3195 } 3196 3197 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3198 { 3199 Mat spRHS; 3200 3201 PetscFunctionBegin; 3202 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3203 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3204 PetscCall(MatDestroy(&spRHS)); 3205 PetscFunctionReturn(PETSC_SUCCESS); 3206 } 3207 3208 /*@ 3209 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc> 3210 3211 Logically Collective 3212 3213 Input Parameter: 3214 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3215 3216 Output Parameter: 3217 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3218 3219 Level: beginner 3220 3221 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3222 @*/ 3223 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3224 { 3225 PetscBool flg; 3226 3227 PetscFunctionBegin; 3228 PetscValidType(F, 1); 3229 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3230 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3231 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3232 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3233 PetscFunctionReturn(PETSC_SUCCESS); 3234 } 3235 3236 static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 3237 { 3238 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3239 3240 PetscFunctionBegin; 3241 if (nblk) { 3242 PetscAssertPointer(blkptr, 4); 3243 PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk)); 3244 PetscCall(PetscFree(mumps->id.blkptr)); 3245 PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr)); 3246 for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i)); 3247 mumps->id.ICNTL(15) = 1; 3248 if (blkvar) { 3249 PetscCall(PetscFree(mumps->id.blkvar)); 3250 PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar)); 3251 for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i)); 3252 } 3253 } else { 3254 PetscCall(PetscFree(mumps->id.blkptr)); 3255 PetscCall(PetscFree(mumps->id.blkvar)); 3256 mumps->id.ICNTL(15) = 0; 3257 } 3258 PetscFunctionReturn(PETSC_SUCCESS); 3259 } 3260 3261 /*@ 3262 MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1` 3263 3264 Not collective, only relevant on the first process of the MPI communicator 3265 3266 Input Parameters: 3267 + F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3268 . nblk - the number of blocks 3269 . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk` 3270 - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks 3271 3272 Level: advanced 3273 3274 .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()` 3275 @*/ 3276 PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 3277 { 3278 PetscFunctionBegin; 3279 PetscValidType(F, 1); 3280 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3281 PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr)); 3282 PetscFunctionReturn(PETSC_SUCCESS); 3283 } 3284 3285 /*@ 3286 MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc> 3287 3288 Logically Collective 3289 3290 Input Parameters: 3291 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3292 - icntl - index of MUMPS parameter array INFO() 3293 3294 Output Parameter: 3295 . ival - value of MUMPS INFO(icntl) 3296 3297 Level: beginner 3298 3299 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3300 @*/ 3301 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3302 { 3303 PetscFunctionBegin; 3304 PetscValidType(F, 1); 3305 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3306 PetscAssertPointer(ival, 3); 3307 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3308 PetscFunctionReturn(PETSC_SUCCESS); 3309 } 3310 3311 /*@ 3312 MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc> 3313 3314 Logically Collective 3315 3316 Input Parameters: 3317 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3318 - icntl - index of MUMPS parameter array INFOG() 3319 3320 Output Parameter: 3321 . ival - value of MUMPS INFOG(icntl) 3322 3323 Level: beginner 3324 3325 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3326 @*/ 3327 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3328 { 3329 PetscFunctionBegin; 3330 PetscValidType(F, 1); 3331 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3332 PetscAssertPointer(ival, 3); 3333 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3334 PetscFunctionReturn(PETSC_SUCCESS); 3335 } 3336 3337 /*@ 3338 MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc> 3339 3340 Logically Collective 3341 3342 Input Parameters: 3343 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3344 - icntl - index of MUMPS parameter array RINFO() 3345 3346 Output Parameter: 3347 . val - value of MUMPS RINFO(icntl) 3348 3349 Level: beginner 3350 3351 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3352 @*/ 3353 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3354 { 3355 PetscFunctionBegin; 3356 PetscValidType(F, 1); 3357 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3358 PetscAssertPointer(val, 3); 3359 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3360 PetscFunctionReturn(PETSC_SUCCESS); 3361 } 3362 3363 /*@ 3364 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc> 3365 3366 Logically Collective 3367 3368 Input Parameters: 3369 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3370 - icntl - index of MUMPS parameter array RINFOG() 3371 3372 Output Parameter: 3373 . val - value of MUMPS RINFOG(icntl) 3374 3375 Level: beginner 3376 3377 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3378 @*/ 3379 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3380 { 3381 PetscFunctionBegin; 3382 PetscValidType(F, 1); 3383 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3384 PetscAssertPointer(val, 3); 3385 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3386 PetscFunctionReturn(PETSC_SUCCESS); 3387 } 3388 3389 /*@ 3390 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc> 3391 3392 Logically Collective 3393 3394 Input Parameter: 3395 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3396 3397 Output Parameters: 3398 + size - local size of the array. The size of the array is non-zero only on MPI rank 0 3399 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3400 for freeing this array. 3401 3402 Level: beginner 3403 3404 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3405 @*/ 3406 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3407 { 3408 PetscFunctionBegin; 3409 PetscValidType(F, 1); 3410 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3411 PetscAssertPointer(size, 2); 3412 PetscAssertPointer(array, 3); 3413 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3414 PetscFunctionReturn(PETSC_SUCCESS); 3415 } 3416 3417 /*MC 3418 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3419 MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc> 3420 3421 Works with `MATAIJ` and `MATSBAIJ` matrices 3422 3423 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3424 3425 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3426 See details below. 3427 3428 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3429 3430 Options Database Keys: 3431 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3432 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3433 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3434 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3435 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3436 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3437 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3438 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3439 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3440 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3441 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3442 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3443 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3444 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3445 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3446 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3447 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3448 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3449 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3450 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3451 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3452 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering 3453 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3454 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3455 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3456 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3457 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3458 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3459 . -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB) 3460 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3461 . -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism 3462 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3463 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3464 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3465 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3466 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3467 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3468 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3469 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3470 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3471 3472 Level: beginner 3473 3474 Notes: 3475 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will 3476 error if the matrix is Hermitian. 3477 3478 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3479 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3480 3481 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3482 the failure with 3483 .vb 3484 KSPGetPC(ksp,&pc); 3485 PCFactorGetMatrix(pc,&mat); 3486 MatMumpsGetInfo(mat,....); 3487 MatMumpsGetInfog(mat,....); etc. 3488 .ve 3489 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3490 3491 MUMPS provides 64-bit integer support in two build modes: 3492 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3493 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3494 3495 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3496 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3497 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3498 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3499 3500 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3501 3502 Two modes to run MUMPS/PETSc with OpenMP 3503 .vb 3504 Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3505 threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test". 3506 .ve 3507 3508 .vb 3509 `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example, 3510 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3511 .ve 3512 3513 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3514 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3515 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3516 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3517 (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided). 3518 3519 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3520 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3521 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3522 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3523 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3524 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3525 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3526 MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3527 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3528 problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding. 3529 For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and 3530 examine the mapping result. 3531 3532 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3533 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3534 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3535 3536 See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating` 3537 3538 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3539 M*/ 3540 3541 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type) 3542 { 3543 PetscFunctionBegin; 3544 *type = MATSOLVERMUMPS; 3545 PetscFunctionReturn(PETSC_SUCCESS); 3546 } 3547 3548 /* MatGetFactor for Seq and MPI AIJ matrices */ 3549 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3550 { 3551 Mat B; 3552 Mat_MUMPS *mumps; 3553 PetscBool isSeqAIJ, isDiag, isDense; 3554 PetscMPIInt size; 3555 3556 PetscFunctionBegin; 3557 #if defined(PETSC_USE_COMPLEX) 3558 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3559 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3560 *F = NULL; 3561 PetscFunctionReturn(PETSC_SUCCESS); 3562 } 3563 #endif 3564 /* Create the factorization matrix */ 3565 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3566 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 3567 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3568 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3569 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3570 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3571 PetscCall(MatSetUp(B)); 3572 3573 PetscCall(PetscNew(&mumps)); 3574 3575 B->ops->view = MatView_MUMPS; 3576 B->ops->getinfo = MatGetInfo_MUMPS; 3577 3578 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3579 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3580 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3581 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3582 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3583 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3584 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3585 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3586 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3587 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3588 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3589 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3590 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3591 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3592 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 3593 3594 if (ftype == MAT_FACTOR_LU) { 3595 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3596 B->factortype = MAT_FACTOR_LU; 3597 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3598 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3599 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3600 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3601 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3602 mumps->sym = 0; 3603 } else { 3604 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3605 B->factortype = MAT_FACTOR_CHOLESKY; 3606 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3607 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3608 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3609 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3610 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3611 #if defined(PETSC_USE_COMPLEX) 3612 mumps->sym = 2; 3613 #else 3614 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3615 else mumps->sym = 2; 3616 #endif 3617 } 3618 3619 /* set solvertype */ 3620 PetscCall(PetscFree(B->solvertype)); 3621 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3622 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3623 if (size == 1) { 3624 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3625 B->canuseordering = PETSC_TRUE; 3626 } 3627 B->ops->destroy = MatDestroy_MUMPS; 3628 B->data = (void *)mumps; 3629 3630 *F = B; 3631 mumps->id.job = JOB_NULL; 3632 mumps->ICNTL_pre = NULL; 3633 mumps->CNTL_pre = NULL; 3634 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3635 PetscFunctionReturn(PETSC_SUCCESS); 3636 } 3637 3638 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3639 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F) 3640 { 3641 Mat B; 3642 Mat_MUMPS *mumps; 3643 PetscBool isSeqSBAIJ; 3644 PetscMPIInt size; 3645 3646 PetscFunctionBegin; 3647 #if defined(PETSC_USE_COMPLEX) 3648 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3649 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3650 *F = NULL; 3651 PetscFunctionReturn(PETSC_SUCCESS); 3652 } 3653 #endif 3654 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3655 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3656 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3657 PetscCall(MatSetUp(B)); 3658 3659 PetscCall(PetscNew(&mumps)); 3660 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3661 if (isSeqSBAIJ) { 3662 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3663 } else { 3664 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3665 } 3666 3667 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3668 B->ops->view = MatView_MUMPS; 3669 B->ops->getinfo = MatGetInfo_MUMPS; 3670 3671 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3672 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3673 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3674 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3675 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3676 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3677 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3678 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3679 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3680 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3681 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3682 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3683 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3684 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3685 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 3686 3687 B->factortype = MAT_FACTOR_CHOLESKY; 3688 #if defined(PETSC_USE_COMPLEX) 3689 mumps->sym = 2; 3690 #else 3691 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3692 else mumps->sym = 2; 3693 #endif 3694 3695 /* set solvertype */ 3696 PetscCall(PetscFree(B->solvertype)); 3697 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3698 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3699 if (size == 1) { 3700 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3701 B->canuseordering = PETSC_TRUE; 3702 } 3703 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3704 B->ops->destroy = MatDestroy_MUMPS; 3705 B->data = (void *)mumps; 3706 3707 *F = B; 3708 mumps->id.job = JOB_NULL; 3709 mumps->ICNTL_pre = NULL; 3710 mumps->CNTL_pre = NULL; 3711 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3712 PetscFunctionReturn(PETSC_SUCCESS); 3713 } 3714 3715 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3716 { 3717 Mat B; 3718 Mat_MUMPS *mumps; 3719 PetscBool isSeqBAIJ; 3720 PetscMPIInt size; 3721 3722 PetscFunctionBegin; 3723 /* Create the factorization matrix */ 3724 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3725 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3726 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3727 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3728 PetscCall(MatSetUp(B)); 3729 3730 PetscCall(PetscNew(&mumps)); 3731 if (ftype == MAT_FACTOR_LU) { 3732 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3733 B->factortype = MAT_FACTOR_LU; 3734 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3735 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3736 mumps->sym = 0; 3737 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3738 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3739 3740 B->ops->view = MatView_MUMPS; 3741 B->ops->getinfo = MatGetInfo_MUMPS; 3742 3743 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3744 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3745 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3746 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3747 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3748 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3749 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3750 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3751 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3752 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3753 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3754 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3755 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3756 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3757 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 3758 3759 /* set solvertype */ 3760 PetscCall(PetscFree(B->solvertype)); 3761 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3762 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3763 if (size == 1) { 3764 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3765 B->canuseordering = PETSC_TRUE; 3766 } 3767 B->ops->destroy = MatDestroy_MUMPS; 3768 B->data = (void *)mumps; 3769 3770 *F = B; 3771 mumps->id.job = JOB_NULL; 3772 mumps->ICNTL_pre = NULL; 3773 mumps->CNTL_pre = NULL; 3774 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3775 PetscFunctionReturn(PETSC_SUCCESS); 3776 } 3777 3778 /* MatGetFactor for Seq and MPI SELL matrices */ 3779 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3780 { 3781 Mat B; 3782 Mat_MUMPS *mumps; 3783 PetscBool isSeqSELL; 3784 PetscMPIInt size; 3785 3786 PetscFunctionBegin; 3787 /* Create the factorization matrix */ 3788 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3789 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3790 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3791 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3792 PetscCall(MatSetUp(B)); 3793 3794 PetscCall(PetscNew(&mumps)); 3795 3796 B->ops->view = MatView_MUMPS; 3797 B->ops->getinfo = MatGetInfo_MUMPS; 3798 3799 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3800 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3801 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3802 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3803 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3804 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3805 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3806 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3807 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3808 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3809 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3810 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3811 3812 if (ftype == MAT_FACTOR_LU) { 3813 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3814 B->factortype = MAT_FACTOR_LU; 3815 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3816 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3817 mumps->sym = 0; 3818 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3819 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3820 3821 /* set solvertype */ 3822 PetscCall(PetscFree(B->solvertype)); 3823 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3824 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3825 if (size == 1) { 3826 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3827 B->canuseordering = PETSC_TRUE; 3828 } 3829 B->ops->destroy = MatDestroy_MUMPS; 3830 B->data = (void *)mumps; 3831 3832 *F = B; 3833 mumps->id.job = JOB_NULL; 3834 mumps->ICNTL_pre = NULL; 3835 mumps->CNTL_pre = NULL; 3836 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3837 PetscFunctionReturn(PETSC_SUCCESS); 3838 } 3839 3840 /* MatGetFactor for MATNEST matrices */ 3841 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3842 { 3843 Mat B, **mats; 3844 Mat_MUMPS *mumps; 3845 PetscInt nr, nc; 3846 PetscMPIInt size; 3847 PetscBool flg = PETSC_TRUE; 3848 3849 PetscFunctionBegin; 3850 #if defined(PETSC_USE_COMPLEX) 3851 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3852 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3853 *F = NULL; 3854 PetscFunctionReturn(PETSC_SUCCESS); 3855 } 3856 #endif 3857 3858 /* Return if some condition is not satisfied */ 3859 *F = NULL; 3860 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3861 if (ftype == MAT_FACTOR_CHOLESKY) { 3862 IS *rows, *cols; 3863 PetscInt *m, *M; 3864 3865 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3866 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3867 PetscCall(MatNestGetISs(A, rows, cols)); 3868 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3869 if (!flg) { 3870 PetscCall(PetscFree2(rows, cols)); 3871 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3872 PetscFunctionReturn(PETSC_SUCCESS); 3873 } 3874 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3875 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3876 for (PetscInt r = 0; flg && r < nr; r++) 3877 for (PetscInt k = r + 1; flg && k < nr; k++) 3878 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3879 PetscCall(PetscFree2(m, M)); 3880 PetscCall(PetscFree2(rows, cols)); 3881 if (!flg) { 3882 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3883 PetscFunctionReturn(PETSC_SUCCESS); 3884 } 3885 } 3886 3887 for (PetscInt r = 0; r < nr; r++) { 3888 for (PetscInt c = 0; c < nc; c++) { 3889 Mat sub = mats[r][c]; 3890 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 3891 3892 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3893 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 3894 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3895 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3896 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3897 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3898 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3899 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3900 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 3901 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3902 if (ftype == MAT_FACTOR_CHOLESKY) { 3903 if (r == c) { 3904 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 3905 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3906 flg = PETSC_FALSE; 3907 } 3908 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3909 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3910 flg = PETSC_FALSE; 3911 } 3912 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3913 PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3914 flg = PETSC_FALSE; 3915 } 3916 } 3917 } 3918 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3919 3920 /* Create the factorization matrix */ 3921 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3922 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3923 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3924 PetscCall(MatSetUp(B)); 3925 3926 PetscCall(PetscNew(&mumps)); 3927 3928 B->ops->view = MatView_MUMPS; 3929 B->ops->getinfo = MatGetInfo_MUMPS; 3930 3931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3940 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3941 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3942 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3943 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3944 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3945 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 3946 3947 if (ftype == MAT_FACTOR_LU) { 3948 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3949 B->factortype = MAT_FACTOR_LU; 3950 mumps->sym = 0; 3951 } else { 3952 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3953 B->factortype = MAT_FACTOR_CHOLESKY; 3954 #if defined(PETSC_USE_COMPLEX) 3955 mumps->sym = 2; 3956 #else 3957 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3958 else mumps->sym = 2; 3959 #endif 3960 } 3961 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3962 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3963 3964 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3965 if (size == 1) { 3966 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3967 B->canuseordering = PETSC_TRUE; 3968 } 3969 3970 /* set solvertype */ 3971 PetscCall(PetscFree(B->solvertype)); 3972 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3973 B->ops->destroy = MatDestroy_MUMPS; 3974 B->data = (void *)mumps; 3975 3976 *F = B; 3977 mumps->id.job = JOB_NULL; 3978 mumps->ICNTL_pre = NULL; 3979 mumps->CNTL_pre = NULL; 3980 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3981 PetscFunctionReturn(PETSC_SUCCESS); 3982 } 3983 3984 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3985 { 3986 PetscFunctionBegin; 3987 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3988 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3989 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3990 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3991 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3992 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3993 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3994 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3995 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3996 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3997 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3998 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3999 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4000 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4001 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4002 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4003 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4004 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 4005 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 4006 PetscFunctionReturn(PETSC_SUCCESS); 4007 } 4008