1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #if defined(PETSC_USE_REAL_SINGLE) 15 #include <cmumps_c.h> 16 #else 17 #include <zmumps_c.h> 18 #endif 19 #else 20 #if defined(PETSC_USE_REAL_SINGLE) 21 #include <smumps_c.h> 22 #else 23 #include <dmumps_c.h> 24 #endif 25 #endif 26 EXTERN_C_END 27 #define JOB_INIT -1 28 #define JOB_NULL 0 29 #define JOB_FACTSYMBOLIC 1 30 #define JOB_FACTNUMERIC 2 31 #define JOB_SOLVE 3 32 #define JOB_END -2 33 34 /* calls to MUMPS */ 35 #if defined(PETSC_USE_COMPLEX) 36 #if defined(PETSC_USE_REAL_SINGLE) 37 #define MUMPS_c cmumps_c 38 #else 39 #define MUMPS_c zmumps_c 40 #endif 41 #else 42 #if defined(PETSC_USE_REAL_SINGLE) 43 #define MUMPS_c smumps_c 44 #else 45 #define MUMPS_c dmumps_c 46 #endif 47 #endif 48 49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 50 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 51 naming convention in PetscMPIInt, PetscBLASInt etc. 52 */ 53 typedef MUMPS_INT PetscMUMPSInt; 54 55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 56 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 57 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 58 #endif 59 #else 60 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 61 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 62 #endif 63 #endif 64 65 #define MPIU_MUMPSINT MPI_INT 66 #define PETSC_MUMPS_INT_MAX 2147483647 67 #define PETSC_MUMPS_INT_MIN -2147483648 68 69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 70 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 71 { 72 PetscFunctionBegin; 73 #if PetscDefined(USE_64BIT_INDICES) 74 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 75 #endif 76 *b = (PetscMUMPSInt)(a); 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 /* Put these utility routines here since they are only used in this file */ 81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 82 { 83 PetscInt myval; 84 PetscBool myset; 85 PetscFunctionBegin; 86 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 87 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 88 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 89 if (set) *set = myset; 90 PetscFunctionReturn(PETSC_SUCCESS); 91 } 92 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 93 94 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 95 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 96 #define PetscMUMPS_c(mumps) \ 97 do { \ 98 if (mumps->use_petsc_omp_support) { \ 99 if (mumps->is_omp_master) { \ 100 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 101 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 102 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 103 PetscCall(PetscFPTrapPop()); \ 104 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 105 } \ 106 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 107 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 108 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 109 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 110 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 111 */ \ 112 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 113 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 115 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 116 } else { \ 117 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 118 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 119 PetscCall(PetscFPTrapPop()); \ 120 } \ 121 } while (0) 122 #else 123 #define PetscMUMPS_c(mumps) \ 124 do { \ 125 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 126 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 127 PetscCall(PetscFPTrapPop()); \ 128 } while (0) 129 #endif 130 131 /* declare MumpsScalar */ 132 #if defined(PETSC_USE_COMPLEX) 133 #if defined(PETSC_USE_REAL_SINGLE) 134 #define MumpsScalar mumps_complex 135 #else 136 #define MumpsScalar mumps_double_complex 137 #endif 138 #else 139 #define MumpsScalar PetscScalar 140 #endif 141 142 /* macros s.t. indices match MUMPS documentation */ 143 #define ICNTL(I) icntl[(I)-1] 144 #define CNTL(I) cntl[(I)-1] 145 #define INFOG(I) infog[(I)-1] 146 #define INFO(I) info[(I)-1] 147 #define RINFOG(I) rinfog[(I)-1] 148 #define RINFO(I) rinfo[(I)-1] 149 150 typedef struct Mat_MUMPS Mat_MUMPS; 151 struct Mat_MUMPS { 152 #if defined(PETSC_USE_COMPLEX) 153 #if defined(PETSC_USE_REAL_SINGLE) 154 CMUMPS_STRUC_C id; 155 #else 156 ZMUMPS_STRUC_C id; 157 #endif 158 #else 159 #if defined(PETSC_USE_REAL_SINGLE) 160 SMUMPS_STRUC_C id; 161 #else 162 DMUMPS_STRUC_C id; 163 #endif 164 #endif 165 166 MatStructure matstruc; 167 PetscMPIInt myid, petsc_size; 168 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 169 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 170 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 171 PetscMUMPSInt sym; 172 MPI_Comm mumps_comm; 173 PetscMUMPSInt *ICNTL_pre; 174 PetscReal *CNTL_pre; 175 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 176 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 177 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 178 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 179 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 180 PetscInt *rhs_nrow, max_nrhs; 181 PetscMPIInt *rhs_recvcounts, *rhs_disps; 182 PetscScalar *rhs_loc, *rhs_recvbuf; 183 #endif 184 Vec b_seq, x_seq; 185 PetscInt ninfo, *info; /* which INFO to display */ 186 PetscInt sizeredrhs; 187 PetscScalar *schur_sol; 188 PetscInt schur_sizesol; 189 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 190 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 191 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 192 193 /* Support for MATNEST */ 194 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 195 PetscInt64 *nest_vals_start; 196 PetscScalar *nest_vals; 197 198 /* stuff used by petsc/mumps OpenMP support*/ 199 PetscBool use_petsc_omp_support; 200 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 201 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 202 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 203 PetscMPIInt tag, omp_comm_size; 204 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 205 MPI_Request *reqs; 206 }; 207 208 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 209 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 210 */ 211 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 212 { 213 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 214 215 PetscFunctionBegin; 216 #if defined(PETSC_USE_64BIT_INDICES) 217 { 218 PetscInt i; 219 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 220 PetscCall(PetscFree(mumps->ia_alloc)); 221 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 222 mumps->cur_ilen = nrow + 1; 223 } 224 if (nnz > mumps->cur_jlen) { 225 PetscCall(PetscFree(mumps->ja_alloc)); 226 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 227 mumps->cur_jlen = nnz; 228 } 229 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 230 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 231 *ia_mumps = mumps->ia_alloc; 232 *ja_mumps = mumps->ja_alloc; 233 } 234 #else 235 *ia_mumps = ia; 236 *ja_mumps = ja; 237 #endif 238 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 243 { 244 PetscFunctionBegin; 245 PetscCall(PetscFree(mumps->id.listvar_schur)); 246 PetscCall(PetscFree(mumps->id.redrhs)); 247 PetscCall(PetscFree(mumps->schur_sol)); 248 mumps->id.size_schur = 0; 249 mumps->id.schur_lld = 0; 250 mumps->id.ICNTL(19) = 0; 251 PetscFunctionReturn(PETSC_SUCCESS); 252 } 253 254 /* solve with rhs in mumps->id.redrhs and return in the same location */ 255 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 256 { 257 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 258 Mat S, B, X; 259 MatFactorSchurStatus schurstatus; 260 PetscInt sizesol; 261 262 PetscFunctionBegin; 263 PetscCall(MatFactorFactorizeSchurComplement(F)); 264 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 265 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 266 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 267 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 268 PetscCall(MatBindToCPU(B, S->boundtocpu)); 269 #endif 270 switch (schurstatus) { 271 case MAT_FACTOR_SCHUR_FACTORED: 272 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 273 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 274 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 275 PetscCall(MatBindToCPU(X, S->boundtocpu)); 276 #endif 277 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 278 PetscCall(MatMatSolveTranspose(S, B, X)); 279 } else { 280 PetscCall(MatMatSolve(S, B, X)); 281 } 282 break; 283 case MAT_FACTOR_SCHUR_INVERTED: 284 sizesol = mumps->id.nrhs * mumps->id.size_schur; 285 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 286 PetscCall(PetscFree(mumps->schur_sol)); 287 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 288 mumps->schur_sizesol = sizesol; 289 } 290 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 291 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 292 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 293 PetscCall(MatBindToCPU(X, S->boundtocpu)); 294 #endif 295 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 296 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 297 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 298 } else { 299 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 300 } 301 PetscCall(MatProductSetFromOptions(X)); 302 PetscCall(MatProductSymbolic(X)); 303 PetscCall(MatProductNumeric(X)); 304 305 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 306 break; 307 default: 308 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 309 } 310 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 311 PetscCall(MatDestroy(&B)); 312 PetscCall(MatDestroy(&X)); 313 PetscFunctionReturn(PETSC_SUCCESS); 314 } 315 316 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 317 { 318 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 319 320 PetscFunctionBegin; 321 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 if (!expansion) { /* prepare for the condensation step */ 325 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 326 /* allocate MUMPS internal array to store reduced right-hand sides */ 327 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 328 PetscCall(PetscFree(mumps->id.redrhs)); 329 mumps->id.lredrhs = mumps->id.size_schur; 330 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 331 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 332 } 333 } else { /* prepare for the expansion step */ 334 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 335 PetscCall(MatMumpsSolveSchur_Private(F)); 336 mumps->id.ICNTL(26) = 2; /* expansion phase */ 337 PetscMUMPS_c(mumps); 338 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 339 /* restore defaults */ 340 mumps->id.ICNTL(26) = -1; 341 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 342 if (mumps->id.nrhs > 1) { 343 PetscCall(PetscFree(mumps->id.redrhs)); 344 mumps->id.lredrhs = 0; 345 mumps->sizeredrhs = 0; 346 } 347 } 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 /* 352 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 353 354 input: 355 A - matrix in aij,baij or sbaij format 356 shift - 0: C style output triple; 1: Fortran style output triple. 357 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 358 MAT_REUSE_MATRIX: only the values in v array are updated 359 output: 360 nnz - dim of r, c, and v (number of local nonzero entries of A) 361 r, c, v - row and col index, matrix values (matrix triples) 362 363 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 364 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 365 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 366 367 */ 368 369 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 370 { 371 const PetscScalar *av; 372 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 373 PetscInt64 nz, rnz, i, j, k; 374 PetscMUMPSInt *row, *col; 375 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 376 377 PetscFunctionBegin; 378 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 379 if (reuse == MAT_INITIAL_MATRIX) { 380 nz = aa->nz; 381 ai = aa->i; 382 aj = aa->j; 383 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 384 for (i = k = 0; i < M; i++) { 385 rnz = ai[i + 1] - ai[i]; 386 ajj = aj + ai[i]; 387 for (j = 0; j < rnz; j++) { 388 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 389 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 390 k++; 391 } 392 } 393 mumps->val = (PetscScalar *)av; 394 mumps->irn = row; 395 mumps->jcn = col; 396 mumps->nnz = nz; 397 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 398 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 399 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 404 { 405 PetscInt64 nz, i, j, k, r; 406 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 407 PetscMUMPSInt *row, *col; 408 409 PetscFunctionBegin; 410 nz = a->sliidx[a->totalslices]; 411 if (reuse == MAT_INITIAL_MATRIX) { 412 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 413 for (i = k = 0; i < a->totalslices; i++) { 414 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 415 } 416 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 417 mumps->irn = row; 418 mumps->jcn = col; 419 mumps->nnz = nz; 420 mumps->val = a->val; 421 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 422 else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 427 { 428 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 429 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 430 PetscInt64 M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 431 PetscInt bs; 432 PetscMUMPSInt *row, *col; 433 434 PetscFunctionBegin; 435 if (reuse == MAT_INITIAL_MATRIX) { 436 PetscCall(MatGetBlockSize(A, &bs)); 437 M = A->rmap->N / bs; 438 ai = aa->i; 439 aj = aa->j; 440 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 441 for (i = 0; i < M; i++) { 442 ajj = aj + ai[i]; 443 rnz = ai[i + 1] - ai[i]; 444 for (k = 0; k < rnz; k++) { 445 for (j = 0; j < bs; j++) { 446 for (m = 0; m < bs; m++) { 447 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 448 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 449 idx++; 450 } 451 } 452 } 453 } 454 mumps->irn = row; 455 mumps->jcn = col; 456 mumps->nnz = nz; 457 mumps->val = aa->a; 458 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 459 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 460 PetscFunctionReturn(PETSC_SUCCESS); 461 } 462 463 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 464 { 465 const PetscInt *ai, *aj, *ajj; 466 PetscInt bs; 467 PetscInt64 nz, rnz, i, j, k, m; 468 PetscMUMPSInt *row, *col; 469 PetscScalar *val; 470 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 471 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 472 #if defined(PETSC_USE_COMPLEX) 473 PetscBool isset, hermitian; 474 #endif 475 476 PetscFunctionBegin; 477 #if defined(PETSC_USE_COMPLEX) 478 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 479 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 480 #endif 481 ai = aa->i; 482 aj = aa->j; 483 PetscCall(MatGetBlockSize(A, &bs)); 484 if (reuse == MAT_INITIAL_MATRIX) { 485 const PetscInt64 alloc_size = aa->nz * bs2; 486 487 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 488 if (bs > 1) { 489 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 490 mumps->val = mumps->val_alloc; 491 } else { 492 mumps->val = aa->a; 493 } 494 mumps->irn = row; 495 mumps->jcn = col; 496 } else { 497 row = mumps->irn; 498 col = mumps->jcn; 499 } 500 val = mumps->val; 501 502 nz = 0; 503 if (bs > 1) { 504 for (i = 0; i < mbs; i++) { 505 rnz = ai[i + 1] - ai[i]; 506 ajj = aj + ai[i]; 507 for (j = 0; j < rnz; j++) { 508 for (k = 0; k < bs; k++) { 509 for (m = 0; m < bs; m++) { 510 if (ajj[j] > i || k >= m) { 511 if (reuse == MAT_INITIAL_MATRIX) { 512 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 513 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 514 } 515 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 516 } 517 } 518 } 519 } 520 } 521 } else if (reuse == MAT_INITIAL_MATRIX) { 522 for (i = 0; i < mbs; i++) { 523 rnz = ai[i + 1] - ai[i]; 524 ajj = aj + ai[i]; 525 for (j = 0; j < rnz; j++) { 526 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 527 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 528 nz++; 529 } 530 } 531 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 532 } else if (mumps->nest_vals) 533 PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 534 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 535 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 536 PetscFunctionReturn(PETSC_SUCCESS); 537 } 538 539 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 540 { 541 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 542 PetscInt64 nz, rnz, i, j; 543 const PetscScalar *av, *v1; 544 PetscScalar *val; 545 PetscMUMPSInt *row, *col; 546 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 547 PetscBool missing; 548 #if defined(PETSC_USE_COMPLEX) 549 PetscBool hermitian, isset; 550 #endif 551 552 PetscFunctionBegin; 553 #if defined(PETSC_USE_COMPLEX) 554 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 555 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 556 #endif 557 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 558 ai = aa->i; 559 aj = aa->j; 560 adiag = aa->diag; 561 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 562 if (reuse == MAT_INITIAL_MATRIX) { 563 /* count nz in the upper triangular part of A */ 564 nz = 0; 565 if (missing) { 566 for (i = 0; i < M; i++) { 567 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 568 for (j = ai[i]; j < ai[i + 1]; j++) { 569 if (aj[j] < i) continue; 570 nz++; 571 } 572 } else { 573 nz += ai[i + 1] - adiag[i]; 574 } 575 } 576 } else { 577 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 578 } 579 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 580 PetscCall(PetscMalloc1(nz, &val)); 581 mumps->nnz = nz; 582 mumps->irn = row; 583 mumps->jcn = col; 584 mumps->val = mumps->val_alloc = val; 585 586 nz = 0; 587 if (missing) { 588 for (i = 0; i < M; i++) { 589 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 590 for (j = ai[i]; j < ai[i + 1]; j++) { 591 if (aj[j] < i) continue; 592 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 593 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 594 val[nz] = av[j]; 595 nz++; 596 } 597 } else { 598 rnz = ai[i + 1] - adiag[i]; 599 ajj = aj + adiag[i]; 600 v1 = av + adiag[i]; 601 for (j = 0; j < rnz; j++) { 602 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 603 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 604 val[nz++] = v1[j]; 605 } 606 } 607 } 608 } else { 609 for (i = 0; i < M; i++) { 610 rnz = ai[i + 1] - adiag[i]; 611 ajj = aj + adiag[i]; 612 v1 = av + adiag[i]; 613 for (j = 0; j < rnz; j++) { 614 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 615 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 616 val[nz++] = v1[j]; 617 } 618 } 619 } 620 } else { 621 nz = 0; 622 val = mumps->val; 623 if (missing) { 624 for (i = 0; i < M; i++) { 625 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 626 for (j = ai[i]; j < ai[i + 1]; j++) { 627 if (aj[j] < i) continue; 628 val[nz++] = av[j]; 629 } 630 } else { 631 rnz = ai[i + 1] - adiag[i]; 632 v1 = av + adiag[i]; 633 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 634 } 635 } 636 } else { 637 for (i = 0; i < M; i++) { 638 rnz = ai[i + 1] - adiag[i]; 639 v1 = av + adiag[i]; 640 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 641 } 642 } 643 } 644 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 645 PetscFunctionReturn(PETSC_SUCCESS); 646 } 647 648 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 649 { 650 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 651 PetscInt bs; 652 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 653 PetscMUMPSInt *row, *col; 654 const PetscScalar *av, *bv, *v1, *v2; 655 PetscScalar *val; 656 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 657 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 658 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 659 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 660 #if defined(PETSC_USE_COMPLEX) 661 PetscBool hermitian, isset; 662 #endif 663 664 PetscFunctionBegin; 665 #if defined(PETSC_USE_COMPLEX) 666 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 667 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 668 #endif 669 PetscCall(MatGetBlockSize(A, &bs)); 670 rstart = A->rmap->rstart; 671 ai = aa->i; 672 aj = aa->j; 673 bi = bb->i; 674 bj = bb->j; 675 av = aa->a; 676 bv = bb->a; 677 678 garray = mat->garray; 679 680 if (reuse == MAT_INITIAL_MATRIX) { 681 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 682 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 683 PetscCall(PetscMalloc1(nz, &val)); 684 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 685 mumps->irn = row; 686 mumps->jcn = col; 687 mumps->val = mumps->val_alloc = val; 688 } else { 689 val = mumps->val; 690 } 691 692 jj = 0; 693 irow = rstart; 694 for (i = 0; i < mbs; i++) { 695 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 696 countA = ai[i + 1] - ai[i]; 697 countB = bi[i + 1] - bi[i]; 698 bjj = bj + bi[i]; 699 v1 = av + ai[i] * bs2; 700 v2 = bv + bi[i] * bs2; 701 702 if (bs > 1) { 703 /* A-part */ 704 for (j = 0; j < countA; j++) { 705 for (k = 0; k < bs; k++) { 706 for (m = 0; m < bs; m++) { 707 if (rstart + ajj[j] * bs > irow || k >= m) { 708 if (reuse == MAT_INITIAL_MATRIX) { 709 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 710 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 711 } 712 val[jj++] = v1[j * bs2 + m + k * bs]; 713 } 714 } 715 } 716 } 717 718 /* B-part */ 719 for (j = 0; j < countB; j++) { 720 for (k = 0; k < bs; k++) { 721 for (m = 0; m < bs; m++) { 722 if (reuse == MAT_INITIAL_MATRIX) { 723 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 724 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 725 } 726 val[jj++] = v2[j * bs2 + m + k * bs]; 727 } 728 } 729 } 730 } else { 731 /* A-part */ 732 for (j = 0; j < countA; j++) { 733 if (reuse == MAT_INITIAL_MATRIX) { 734 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 735 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 736 } 737 val[jj++] = v1[j]; 738 } 739 740 /* B-part */ 741 for (j = 0; j < countB; j++) { 742 if (reuse == MAT_INITIAL_MATRIX) { 743 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 744 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 745 } 746 val[jj++] = v2[j]; 747 } 748 } 749 irow += bs; 750 } 751 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 752 PetscFunctionReturn(PETSC_SUCCESS); 753 } 754 755 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 756 { 757 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 758 PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 759 PetscMUMPSInt *row, *col; 760 const PetscScalar *av, *bv, *v1, *v2; 761 PetscScalar *val; 762 Mat Ad, Ao; 763 Mat_SeqAIJ *aa; 764 Mat_SeqAIJ *bb; 765 766 PetscFunctionBegin; 767 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 768 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 769 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 770 771 aa = (Mat_SeqAIJ *)(Ad)->data; 772 bb = (Mat_SeqAIJ *)(Ao)->data; 773 ai = aa->i; 774 aj = aa->j; 775 bi = bb->i; 776 bj = bb->j; 777 778 rstart = A->rmap->rstart; 779 cstart = A->cmap->rstart; 780 781 if (reuse == MAT_INITIAL_MATRIX) { 782 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 783 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 784 PetscCall(PetscMalloc1(nz, &val)); 785 mumps->nnz = nz; 786 mumps->irn = row; 787 mumps->jcn = col; 788 mumps->val = mumps->val_alloc = val; 789 } else { 790 val = mumps->val; 791 } 792 793 jj = 0; 794 irow = rstart; 795 for (i = 0; i < m; i++) { 796 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 797 countA = ai[i + 1] - ai[i]; 798 countB = bi[i + 1] - bi[i]; 799 bjj = bj + bi[i]; 800 v1 = av + ai[i]; 801 v2 = bv + bi[i]; 802 803 /* A-part */ 804 for (j = 0; j < countA; j++) { 805 if (reuse == MAT_INITIAL_MATRIX) { 806 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 807 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 808 } 809 val[jj++] = v1[j]; 810 } 811 812 /* B-part */ 813 for (j = 0; j < countB; j++) { 814 if (reuse == MAT_INITIAL_MATRIX) { 815 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 816 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 817 } 818 val[jj++] = v2[j]; 819 } 820 irow++; 821 } 822 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 823 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 824 PetscFunctionReturn(PETSC_SUCCESS); 825 } 826 827 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 828 { 829 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 830 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 831 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 832 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 833 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 834 const PetscInt bs2 = mat->bs2; 835 PetscInt bs; 836 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 837 PetscMUMPSInt *row, *col; 838 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 839 PetscScalar *val; 840 841 PetscFunctionBegin; 842 PetscCall(MatGetBlockSize(A, &bs)); 843 if (reuse == MAT_INITIAL_MATRIX) { 844 nz = bs2 * (aa->nz + bb->nz); 845 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 846 PetscCall(PetscMalloc1(nz, &val)); 847 mumps->nnz = nz; 848 mumps->irn = row; 849 mumps->jcn = col; 850 mumps->val = mumps->val_alloc = val; 851 } else { 852 val = mumps->val; 853 } 854 855 jj = 0; 856 irow = rstart; 857 for (i = 0; i < mbs; i++) { 858 countA = ai[i + 1] - ai[i]; 859 countB = bi[i + 1] - bi[i]; 860 ajj = aj + ai[i]; 861 bjj = bj + bi[i]; 862 v1 = av + bs2 * ai[i]; 863 v2 = bv + bs2 * bi[i]; 864 865 idx = 0; 866 /* A-part */ 867 for (k = 0; k < countA; k++) { 868 for (j = 0; j < bs; j++) { 869 for (n = 0; n < bs; n++) { 870 if (reuse == MAT_INITIAL_MATRIX) { 871 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 872 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 873 } 874 val[jj++] = v1[idx++]; 875 } 876 } 877 } 878 879 idx = 0; 880 /* B-part */ 881 for (k = 0; k < countB; k++) { 882 for (j = 0; j < bs; j++) { 883 for (n = 0; n < bs; n++) { 884 if (reuse == MAT_INITIAL_MATRIX) { 885 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 886 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 887 } 888 val[jj++] = v2[idx++]; 889 } 890 } 891 } 892 irow += bs; 893 } 894 PetscFunctionReturn(PETSC_SUCCESS); 895 } 896 897 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 898 { 899 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 900 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 901 PetscMUMPSInt *row, *col; 902 const PetscScalar *av, *bv, *v1, *v2; 903 PetscScalar *val; 904 Mat Ad, Ao; 905 Mat_SeqAIJ *aa; 906 Mat_SeqAIJ *bb; 907 #if defined(PETSC_USE_COMPLEX) 908 PetscBool hermitian, isset; 909 #endif 910 911 PetscFunctionBegin; 912 #if defined(PETSC_USE_COMPLEX) 913 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 914 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 915 #endif 916 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 917 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 918 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 919 920 aa = (Mat_SeqAIJ *)(Ad)->data; 921 bb = (Mat_SeqAIJ *)(Ao)->data; 922 ai = aa->i; 923 aj = aa->j; 924 adiag = aa->diag; 925 bi = bb->i; 926 bj = bb->j; 927 928 rstart = A->rmap->rstart; 929 930 if (reuse == MAT_INITIAL_MATRIX) { 931 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 932 nzb = 0; /* num of upper triangular entries in mat->B */ 933 for (i = 0; i < m; i++) { 934 nza += (ai[i + 1] - adiag[i]); 935 countB = bi[i + 1] - bi[i]; 936 bjj = bj + bi[i]; 937 for (j = 0; j < countB; j++) { 938 if (garray[bjj[j]] > rstart) nzb++; 939 } 940 } 941 942 nz = nza + nzb; /* total nz of upper triangular part of mat */ 943 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 944 PetscCall(PetscMalloc1(nz, &val)); 945 mumps->nnz = nz; 946 mumps->irn = row; 947 mumps->jcn = col; 948 mumps->val = mumps->val_alloc = val; 949 } else { 950 val = mumps->val; 951 } 952 953 jj = 0; 954 irow = rstart; 955 for (i = 0; i < m; i++) { 956 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 957 v1 = av + adiag[i]; 958 countA = ai[i + 1] - adiag[i]; 959 countB = bi[i + 1] - bi[i]; 960 bjj = bj + bi[i]; 961 v2 = bv + bi[i]; 962 963 /* A-part */ 964 for (j = 0; j < countA; j++) { 965 if (reuse == MAT_INITIAL_MATRIX) { 966 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 967 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 968 } 969 val[jj++] = v1[j]; 970 } 971 972 /* B-part */ 973 for (j = 0; j < countB; j++) { 974 if (garray[bjj[j]] > rstart) { 975 if (reuse == MAT_INITIAL_MATRIX) { 976 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 977 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 978 } 979 val[jj++] = v2[j]; 980 } 981 } 982 irow++; 983 } 984 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 985 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 986 PetscFunctionReturn(PETSC_SUCCESS); 987 } 988 989 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 990 { 991 const PetscScalar *av; 992 const PetscInt M = A->rmap->n; 993 PetscInt64 i; 994 PetscMUMPSInt *row, *col; 995 Vec v; 996 997 PetscFunctionBegin; 998 PetscCall(MatDiagonalGetDiagonal(A, &v)); 999 PetscCall(VecGetArrayRead(v, &av)); 1000 if (reuse == MAT_INITIAL_MATRIX) { 1001 PetscCall(PetscMalloc2(M, &row, M, &col)); 1002 for (i = 0; i < M; i++) { 1003 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1004 col[i] = row[i]; 1005 } 1006 mumps->val = (PetscScalar *)av; 1007 mumps->irn = row; 1008 mumps->jcn = col; 1009 mumps->nnz = M; 1010 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1011 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1012 PetscCall(VecRestoreArrayRead(v, &av)); 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1017 { 1018 Mat **mats; 1019 PetscInt nr, nc; 1020 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1021 1022 PetscFunctionBegin; 1023 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1024 if (reuse == MAT_INITIAL_MATRIX) { 1025 PetscMUMPSInt *irns, *jcns; 1026 PetscScalar *vals; 1027 PetscInt64 totnnz, cumnnz, maxnnz; 1028 PetscInt *pjcns_w; 1029 IS *rows, *cols; 1030 PetscInt **rows_idx, **cols_idx; 1031 1032 cumnnz = 0; 1033 maxnnz = 0; 1034 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1035 for (PetscInt r = 0; r < nr; r++) { 1036 for (PetscInt c = 0; c < nc; c++) { 1037 Mat sub = mats[r][c]; 1038 1039 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1040 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1041 if (sub) { 1042 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1043 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE, isDiag; 1044 MatInfo info; 1045 1046 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1047 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1048 else { 1049 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1050 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1051 } 1052 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1053 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1054 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1055 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1056 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1057 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1058 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1059 1060 if (chol) { 1061 if (r == c) { 1062 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1063 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1064 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1065 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1066 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1067 } else { 1068 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1069 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1070 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1071 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1072 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1073 } 1074 } else { 1075 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1076 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1077 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1078 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1079 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1080 } 1081 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1082 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1083 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1084 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1085 maxnnz = PetscMax(maxnnz, info.nz_used); 1086 } 1087 } 1088 } 1089 1090 /* Allocate total COO */ 1091 totnnz = cumnnz; 1092 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1093 PetscCall(PetscMalloc1(totnnz, &vals)); 1094 1095 /* Handle rows and column maps 1096 We directly map rows and use an SF for the columns */ 1097 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1098 PetscCall(MatNestGetISs(A, rows, cols)); 1099 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1100 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1101 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1102 else (void)maxnnz; 1103 1104 cumnnz = 0; 1105 for (PetscInt r = 0; r < nr; r++) { 1106 for (PetscInt c = 0; c < nc; c++) { 1107 Mat sub = mats[r][c]; 1108 const PetscInt *ridx = rows_idx[r]; 1109 const PetscInt *cidx = cols_idx[c]; 1110 PetscInt rst; 1111 PetscSF csf; 1112 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1113 PetscLayout cmap; 1114 1115 mumps->nest_vals_start[r * nc + c] = cumnnz; 1116 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1117 1118 /* Extract inner blocks if needed */ 1119 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1120 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1121 else { 1122 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1123 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1124 } 1125 swap = (PetscBool)(isTrans || isHTrans); 1126 1127 /* Get column layout to map off-process columns */ 1128 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1129 1130 /* Get row start to map on-process rows */ 1131 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1132 1133 /* Directly use the mumps datastructure and use C ordering for now */ 1134 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1135 1136 /* Swap the role of rows and columns indices for transposed blocks 1137 since we need values with global final ordering */ 1138 if (swap) { 1139 cidx = rows_idx[r]; 1140 ridx = cols_idx[c]; 1141 } 1142 1143 /* Communicate column indices 1144 This could have been done with a single SF but it would have complicated the code a lot. 1145 But since we do it only once, we pay the price of setting up an SF for each block */ 1146 if (PetscDefined(USE_64BIT_INDICES)) { 1147 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1148 } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 1149 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1150 PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1151 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1152 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1153 PetscCall(PetscSFDestroy(&csf)); 1154 1155 /* Import indices: use direct map for rows and mapped indices for columns */ 1156 if (swap) { 1157 for (PetscInt k = 0; k < mumps->nnz; k++) { 1158 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1159 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1160 } 1161 } else { 1162 for (PetscInt k = 0; k < mumps->nnz; k++) { 1163 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1164 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1165 } 1166 } 1167 1168 /* Import values to full COO */ 1169 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1170 if (isHTrans) { /* conjugate the entries */ 1171 PetscScalar *v = vals + cumnnz; 1172 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]); 1173 } 1174 1175 /* Shift new starting point and sanity check */ 1176 cumnnz += mumps->nnz; 1177 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1178 1179 /* Free scratch memory */ 1180 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1181 PetscCall(PetscFree(mumps->val_alloc)); 1182 mumps->val = NULL; 1183 mumps->nnz = 0; 1184 } 1185 } 1186 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1187 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1188 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1189 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1190 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1191 mumps->nest_vals_start[nr * nc] = cumnnz; 1192 1193 /* Set pointers for final MUMPS data structure */ 1194 mumps->nest_vals = vals; 1195 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1196 mumps->val = vals; 1197 mumps->irn = irns; 1198 mumps->jcn = jcns; 1199 mumps->nnz = cumnnz; 1200 } else { 1201 PetscScalar *oval = mumps->nest_vals; 1202 for (PetscInt r = 0; r < nr; r++) { 1203 for (PetscInt c = 0; c < nc; c++) { 1204 PetscBool isTrans, isHTrans = PETSC_FALSE; 1205 Mat sub = mats[r][c]; 1206 PetscInt midx = r * nc + c; 1207 1208 if (!mumps->nest_convert_to_triples[midx]) continue; 1209 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1210 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1211 else { 1212 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1213 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1214 } 1215 mumps->val = oval + mumps->nest_vals_start[midx]; 1216 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1217 if (isHTrans) { 1218 PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1219 for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1220 } 1221 } 1222 } 1223 mumps->val = oval; 1224 } 1225 PetscFunctionReturn(PETSC_SUCCESS); 1226 } 1227 1228 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1229 { 1230 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1231 1232 PetscFunctionBegin; 1233 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1234 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1235 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1236 PetscCall(VecDestroy(&mumps->b_seq)); 1237 PetscCall(VecDestroy(&mumps->x_seq)); 1238 PetscCall(PetscFree(mumps->id.perm_in)); 1239 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1240 PetscCall(PetscFree(mumps->val_alloc)); 1241 PetscCall(PetscFree(mumps->info)); 1242 PetscCall(PetscFree(mumps->ICNTL_pre)); 1243 PetscCall(PetscFree(mumps->CNTL_pre)); 1244 PetscCall(MatMumpsResetSchur_Private(mumps)); 1245 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1246 mumps->id.job = JOB_END; 1247 PetscMUMPS_c(mumps); 1248 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1249 if (mumps->mumps_comm != MPI_COMM_NULL) { 1250 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1251 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1252 } 1253 } 1254 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1255 if (mumps->use_petsc_omp_support) { 1256 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1257 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1258 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1259 } 1260 #endif 1261 PetscCall(PetscFree(mumps->ia_alloc)); 1262 PetscCall(PetscFree(mumps->ja_alloc)); 1263 PetscCall(PetscFree(mumps->recvcount)); 1264 PetscCall(PetscFree(mumps->reqs)); 1265 PetscCall(PetscFree(mumps->irhs_loc)); 1266 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1267 PetscCall(PetscFree(mumps->nest_vals)); 1268 PetscCall(PetscFree(A->data)); 1269 1270 /* clear composed functions */ 1271 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1272 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1273 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1274 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1275 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1276 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1277 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1278 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1279 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1280 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1281 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1282 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1283 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1284 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1285 PetscFunctionReturn(PETSC_SUCCESS); 1286 } 1287 1288 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1289 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1290 { 1291 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1292 const PetscMPIInt ompsize = mumps->omp_comm_size; 1293 PetscInt i, m, M, rstart; 1294 1295 PetscFunctionBegin; 1296 PetscCall(MatGetSize(A, &M, NULL)); 1297 PetscCall(MatGetLocalSize(A, &m, NULL)); 1298 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1299 if (ompsize == 1) { 1300 if (!mumps->irhs_loc) { 1301 mumps->nloc_rhs = m; 1302 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1303 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1304 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1305 } 1306 mumps->id.rhs_loc = (MumpsScalar *)array; 1307 } else { 1308 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1309 const PetscInt *ranges; 1310 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1311 MPI_Group petsc_group, omp_group; 1312 PetscScalar *recvbuf = NULL; 1313 1314 if (mumps->is_omp_master) { 1315 /* Lazily initialize the omp stuff for distributed rhs */ 1316 if (!mumps->irhs_loc) { 1317 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1318 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1319 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1320 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1321 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1322 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1323 1324 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1325 mumps->nloc_rhs = 0; 1326 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1327 for (j = 0; j < ompsize; j++) { 1328 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1329 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1330 } 1331 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1332 for (j = k = 0; j < ompsize; j++) { 1333 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1334 } 1335 1336 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1337 PetscCallMPI(MPI_Group_free(&petsc_group)); 1338 PetscCallMPI(MPI_Group_free(&omp_group)); 1339 } 1340 1341 /* Realloc buffers when current nrhs is bigger than what we have met */ 1342 if (nrhs > mumps->max_nrhs) { 1343 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1344 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1345 mumps->max_nrhs = nrhs; 1346 } 1347 1348 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1349 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1350 mumps->rhs_disps[0] = 0; 1351 for (j = 1; j < ompsize; j++) { 1352 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1353 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1354 } 1355 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1356 } 1357 1358 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1359 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1360 1361 if (mumps->is_omp_master) { 1362 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1363 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1364 for (j = 0; j < ompsize; j++) { 1365 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1366 dst = dstbase; 1367 for (i = 0; i < nrhs; i++) { 1368 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1369 src += mumps->rhs_nrow[j]; 1370 dst += mumps->nloc_rhs; 1371 } 1372 dstbase += mumps->rhs_nrow[j]; 1373 } 1374 } 1375 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1376 } 1377 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1378 } 1379 mumps->id.nrhs = nrhs; 1380 mumps->id.nloc_rhs = mumps->nloc_rhs; 1381 mumps->id.lrhs_loc = mumps->nloc_rhs; 1382 mumps->id.irhs_loc = mumps->irhs_loc; 1383 PetscFunctionReturn(PETSC_SUCCESS); 1384 } 1385 1386 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1387 { 1388 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1389 const PetscScalar *rarray = NULL; 1390 PetscScalar *array; 1391 IS is_iden, is_petsc; 1392 PetscInt i; 1393 PetscBool second_solve = PETSC_FALSE; 1394 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1395 1396 PetscFunctionBegin; 1397 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1398 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1399 &cite1)); 1400 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1401 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1402 &cite2)); 1403 1404 if (A->factorerrortype) { 1405 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1406 PetscCall(VecSetInf(x)); 1407 PetscFunctionReturn(PETSC_SUCCESS); 1408 } 1409 1410 mumps->id.nrhs = 1; 1411 if (mumps->petsc_size > 1) { 1412 if (mumps->ICNTL20 == 10) { 1413 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1414 PetscCall(VecGetArrayRead(b, &rarray)); 1415 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1416 } else { 1417 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1418 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1419 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1420 if (!mumps->myid) { 1421 PetscCall(VecGetArray(mumps->b_seq, &array)); 1422 mumps->id.rhs = (MumpsScalar *)array; 1423 } 1424 } 1425 } else { /* petsc_size == 1 */ 1426 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1427 PetscCall(VecCopy(b, x)); 1428 PetscCall(VecGetArray(x, &array)); 1429 mumps->id.rhs = (MumpsScalar *)array; 1430 } 1431 1432 /* 1433 handle condensation step of Schur complement (if any) 1434 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1435 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1436 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1437 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1438 */ 1439 if (mumps->id.size_schur > 0) { 1440 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1441 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1442 second_solve = PETSC_TRUE; 1443 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1444 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1445 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1446 } 1447 /* solve phase */ 1448 mumps->id.job = JOB_SOLVE; 1449 PetscMUMPS_c(mumps); 1450 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1451 1452 /* handle expansion step of Schur complement (if any) */ 1453 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1454 else if (mumps->id.ICNTL(26) == 1) { 1455 PetscCall(MatMumpsSolveSchur_Private(A)); 1456 for (i = 0; i < mumps->id.size_schur; ++i) { 1457 #if !defined(PETSC_USE_COMPLEX) 1458 PetscScalar val = mumps->id.redrhs[i]; 1459 #else 1460 PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i; 1461 #endif 1462 array[mumps->id.listvar_schur[i] - 1] = val; 1463 } 1464 } 1465 1466 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1467 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1468 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1469 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1470 } 1471 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1472 PetscInt *isol2_loc = NULL; 1473 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1474 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1475 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1476 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1477 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1478 PetscCall(ISDestroy(&is_iden)); 1479 PetscCall(ISDestroy(&is_petsc)); 1480 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1481 } 1482 1483 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1484 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1485 } 1486 1487 if (mumps->petsc_size > 1) { 1488 if (mumps->ICNTL20 == 10) { 1489 PetscCall(VecRestoreArrayRead(b, &rarray)); 1490 } else if (!mumps->myid) { 1491 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1492 } 1493 } else PetscCall(VecRestoreArray(x, &array)); 1494 1495 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1496 PetscFunctionReturn(PETSC_SUCCESS); 1497 } 1498 1499 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1500 { 1501 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1502 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1503 1504 PetscFunctionBegin; 1505 mumps->id.ICNTL(9) = 0; 1506 PetscCall(MatSolve_MUMPS(A, b, x)); 1507 mumps->id.ICNTL(9) = value; 1508 PetscFunctionReturn(PETSC_SUCCESS); 1509 } 1510 1511 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1512 { 1513 Mat Bt = NULL; 1514 PetscBool denseX, denseB, flg, flgT; 1515 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1516 PetscInt i, nrhs, M; 1517 PetscScalar *array; 1518 const PetscScalar *rbray; 1519 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1520 PetscMUMPSInt *isol_loc, *isol_loc_save; 1521 PetscScalar *bray, *sol_loc, *sol_loc_save; 1522 IS is_to, is_from; 1523 PetscInt k, proc, j, m, myrstart; 1524 const PetscInt *rstart; 1525 Vec v_mpi, msol_loc; 1526 VecScatter scat_sol; 1527 Vec b_seq; 1528 VecScatter scat_rhs; 1529 PetscScalar *aa; 1530 PetscInt spnr, *ia, *ja; 1531 Mat_MPIAIJ *b = NULL; 1532 1533 PetscFunctionBegin; 1534 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1535 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1536 1537 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1538 if (denseB) { 1539 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1540 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1541 } else { /* sparse B */ 1542 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1543 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1544 if (flgT) { /* input B is transpose of actual RHS matrix, 1545 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1546 PetscCall(MatTransposeGetMat(B, &Bt)); 1547 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1548 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1549 } 1550 1551 PetscCall(MatGetSize(B, &M, &nrhs)); 1552 mumps->id.nrhs = nrhs; 1553 mumps->id.lrhs = M; 1554 mumps->id.rhs = NULL; 1555 1556 if (mumps->petsc_size == 1) { 1557 PetscScalar *aa; 1558 PetscInt spnr, *ia, *ja; 1559 PetscBool second_solve = PETSC_FALSE; 1560 1561 PetscCall(MatDenseGetArray(X, &array)); 1562 mumps->id.rhs = (MumpsScalar *)array; 1563 1564 if (denseB) { 1565 /* copy B to X */ 1566 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1567 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1568 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1569 } else { /* sparse B */ 1570 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1571 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1572 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1573 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1574 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1575 } 1576 /* handle condensation step of Schur complement (if any) */ 1577 if (mumps->id.size_schur > 0) { 1578 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1579 second_solve = PETSC_TRUE; 1580 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1581 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1582 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1583 } 1584 /* solve phase */ 1585 mumps->id.job = JOB_SOLVE; 1586 PetscMUMPS_c(mumps); 1587 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1588 1589 /* handle expansion step of Schur complement (if any) */ 1590 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1591 else if (mumps->id.ICNTL(26) == 1) { 1592 PetscCall(MatMumpsSolveSchur_Private(A)); 1593 for (j = 0; j < nrhs; ++j) 1594 for (i = 0; i < mumps->id.size_schur; ++i) { 1595 #if !defined(PETSC_USE_COMPLEX) 1596 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs]; 1597 #else 1598 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i; 1599 #endif 1600 array[mumps->id.listvar_schur[i] - 1 + j * M] = val; 1601 } 1602 } 1603 if (!denseB) { /* sparse B */ 1604 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1605 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1606 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1607 } 1608 PetscCall(MatDenseRestoreArray(X, &array)); 1609 PetscFunctionReturn(PETSC_SUCCESS); 1610 } 1611 1612 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1613 PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1614 1615 /* create msol_loc to hold mumps local solution */ 1616 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1617 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1618 1619 lsol_loc = mumps->id.lsol_loc; 1620 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1621 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1622 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1623 mumps->id.isol_loc = isol_loc; 1624 1625 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1626 1627 if (denseB) { 1628 if (mumps->ICNTL20 == 10) { 1629 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1630 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1631 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1632 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1633 PetscCall(MatGetLocalSize(B, &m, NULL)); 1634 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1635 } else { 1636 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1637 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1638 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1639 0, re-arrange B into desired order, which is a local operation. 1640 */ 1641 1642 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1643 /* wrap dense rhs matrix B into a vector v_mpi */ 1644 PetscCall(MatGetLocalSize(B, &m, NULL)); 1645 PetscCall(MatDenseGetArray(B, &bray)); 1646 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1647 PetscCall(MatDenseRestoreArray(B, &bray)); 1648 1649 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1650 if (!mumps->myid) { 1651 PetscInt *idx; 1652 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1653 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1654 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1655 k = 0; 1656 for (proc = 0; proc < mumps->petsc_size; proc++) { 1657 for (j = 0; j < nrhs; j++) { 1658 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1659 } 1660 } 1661 1662 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1663 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1664 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1665 } else { 1666 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1667 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1668 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1669 } 1670 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1671 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1672 PetscCall(ISDestroy(&is_to)); 1673 PetscCall(ISDestroy(&is_from)); 1674 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1675 1676 if (!mumps->myid) { /* define rhs on the host */ 1677 PetscCall(VecGetArray(b_seq, &bray)); 1678 mumps->id.rhs = (MumpsScalar *)bray; 1679 PetscCall(VecRestoreArray(b_seq, &bray)); 1680 } 1681 } 1682 } else { /* sparse B */ 1683 b = (Mat_MPIAIJ *)Bt->data; 1684 1685 /* wrap dense X into a vector v_mpi */ 1686 PetscCall(MatGetLocalSize(X, &m, NULL)); 1687 PetscCall(MatDenseGetArray(X, &bray)); 1688 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1689 PetscCall(MatDenseRestoreArray(X, &bray)); 1690 1691 if (!mumps->myid) { 1692 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1693 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1694 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1695 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1696 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1697 } else { 1698 mumps->id.irhs_ptr = NULL; 1699 mumps->id.irhs_sparse = NULL; 1700 mumps->id.nz_rhs = 0; 1701 mumps->id.rhs_sparse = NULL; 1702 } 1703 } 1704 1705 /* solve phase */ 1706 mumps->id.job = JOB_SOLVE; 1707 PetscMUMPS_c(mumps); 1708 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1709 1710 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1711 PetscCall(MatDenseGetArray(X, &array)); 1712 PetscCall(VecPlaceArray(v_mpi, array)); 1713 1714 /* create scatter scat_sol */ 1715 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1716 /* iidx: index for scatter mumps solution to petsc X */ 1717 1718 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1719 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1720 for (i = 0; i < lsol_loc; i++) { 1721 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1722 1723 for (proc = 0; proc < mumps->petsc_size; proc++) { 1724 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1725 myrstart = rstart[proc]; 1726 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1727 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1728 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1729 break; 1730 } 1731 } 1732 1733 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1734 } 1735 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1736 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1737 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1738 PetscCall(ISDestroy(&is_from)); 1739 PetscCall(ISDestroy(&is_to)); 1740 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1741 PetscCall(MatDenseRestoreArray(X, &array)); 1742 1743 /* free spaces */ 1744 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1745 mumps->id.isol_loc = isol_loc_save; 1746 1747 PetscCall(PetscFree2(sol_loc, isol_loc)); 1748 PetscCall(PetscFree(idxx)); 1749 PetscCall(VecDestroy(&msol_loc)); 1750 PetscCall(VecDestroy(&v_mpi)); 1751 if (!denseB) { 1752 if (!mumps->myid) { 1753 b = (Mat_MPIAIJ *)Bt->data; 1754 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1755 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1756 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1757 } 1758 } else { 1759 if (mumps->ICNTL20 == 0) { 1760 PetscCall(VecDestroy(&b_seq)); 1761 PetscCall(VecScatterDestroy(&scat_rhs)); 1762 } 1763 } 1764 PetscCall(VecScatterDestroy(&scat_sol)); 1765 PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)))); 1766 PetscFunctionReturn(PETSC_SUCCESS); 1767 } 1768 1769 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1770 { 1771 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1772 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1773 1774 PetscFunctionBegin; 1775 mumps->id.ICNTL(9) = 0; 1776 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1777 mumps->id.ICNTL(9) = value; 1778 PetscFunctionReturn(PETSC_SUCCESS); 1779 } 1780 1781 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1782 { 1783 PetscBool flg; 1784 Mat B; 1785 1786 PetscFunctionBegin; 1787 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1788 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1789 1790 /* Create B=Bt^T that uses Bt's data structure */ 1791 PetscCall(MatCreateTranspose(Bt, &B)); 1792 1793 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1794 PetscCall(MatDestroy(&B)); 1795 PetscFunctionReturn(PETSC_SUCCESS); 1796 } 1797 1798 #if !defined(PETSC_USE_COMPLEX) 1799 /* 1800 input: 1801 F: numeric factor 1802 output: 1803 nneg: total number of negative pivots 1804 nzero: total number of zero pivots 1805 npos: (global dimension of F) - nneg - nzero 1806 */ 1807 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1808 { 1809 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1810 PetscMPIInt size; 1811 1812 PetscFunctionBegin; 1813 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1814 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1815 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1816 1817 if (nneg) *nneg = mumps->id.INFOG(12); 1818 if (nzero || npos) { 1819 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1820 if (nzero) *nzero = mumps->id.INFOG(28); 1821 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1822 } 1823 PetscFunctionReturn(PETSC_SUCCESS); 1824 } 1825 #endif 1826 1827 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1828 { 1829 PetscInt i, nreqs; 1830 PetscMUMPSInt *irn, *jcn; 1831 PetscMPIInt count; 1832 PetscInt64 totnnz, remain; 1833 const PetscInt osize = mumps->omp_comm_size; 1834 PetscScalar *val; 1835 1836 PetscFunctionBegin; 1837 if (osize > 1) { 1838 if (reuse == MAT_INITIAL_MATRIX) { 1839 /* master first gathers counts of nonzeros to receive */ 1840 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1841 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1842 1843 /* Then each computes number of send/recvs */ 1844 if (mumps->is_omp_master) { 1845 /* Start from 1 since self communication is not done in MPI */ 1846 nreqs = 0; 1847 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1848 } else { 1849 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1850 } 1851 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1852 1853 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1854 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1855 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1856 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1857 */ 1858 nreqs = 0; /* counter for actual send/recvs */ 1859 if (mumps->is_omp_master) { 1860 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1861 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1862 PetscCall(PetscMalloc1(totnnz, &val)); 1863 1864 /* Self communication */ 1865 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1866 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1867 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1868 1869 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1870 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1871 PetscCall(PetscFree(mumps->val_alloc)); 1872 mumps->nnz = totnnz; 1873 mumps->irn = irn; 1874 mumps->jcn = jcn; 1875 mumps->val = mumps->val_alloc = val; 1876 1877 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1878 jcn += mumps->recvcount[0]; 1879 val += mumps->recvcount[0]; 1880 1881 /* Remote communication */ 1882 for (i = 1; i < osize; i++) { 1883 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1884 remain = mumps->recvcount[i] - count; 1885 while (count > 0) { 1886 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1887 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1888 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1889 irn += count; 1890 jcn += count; 1891 val += count; 1892 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1893 remain -= count; 1894 } 1895 } 1896 } else { 1897 irn = mumps->irn; 1898 jcn = mumps->jcn; 1899 val = mumps->val; 1900 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1901 remain = mumps->nnz - count; 1902 while (count > 0) { 1903 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1904 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1905 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1906 irn += count; 1907 jcn += count; 1908 val += count; 1909 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1910 remain -= count; 1911 } 1912 } 1913 } else { 1914 nreqs = 0; 1915 if (mumps->is_omp_master) { 1916 val = mumps->val + mumps->recvcount[0]; 1917 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1918 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1919 remain = mumps->recvcount[i] - count; 1920 while (count > 0) { 1921 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1922 val += count; 1923 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1924 remain -= count; 1925 } 1926 } 1927 } else { 1928 val = mumps->val; 1929 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1930 remain = mumps->nnz - count; 1931 while (count > 0) { 1932 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1933 val += count; 1934 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1935 remain -= count; 1936 } 1937 } 1938 } 1939 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1940 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1941 } 1942 PetscFunctionReturn(PETSC_SUCCESS); 1943 } 1944 1945 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1946 { 1947 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1948 PetscBool isMPIAIJ; 1949 1950 PetscFunctionBegin; 1951 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1952 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1953 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1954 PetscFunctionReturn(PETSC_SUCCESS); 1955 } 1956 1957 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1958 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1959 1960 /* numerical factorization phase */ 1961 mumps->id.job = JOB_FACTNUMERIC; 1962 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1963 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1964 } else { 1965 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1966 } 1967 PetscMUMPS_c(mumps); 1968 if (mumps->id.INFOG(1) < 0) { 1969 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 1970 if (mumps->id.INFOG(1) == -10) { 1971 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1972 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1973 } else if (mumps->id.INFOG(1) == -13) { 1974 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1975 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1976 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1977 PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1978 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1979 } else { 1980 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1981 F->factorerrortype = MAT_FACTOR_OTHER; 1982 } 1983 } 1984 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 1985 1986 F->assembled = PETSC_TRUE; 1987 1988 if (F->schur) { /* reset Schur status to unfactored */ 1989 #if defined(PETSC_HAVE_CUDA) 1990 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1991 #endif 1992 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1993 mumps->id.ICNTL(19) = 2; 1994 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1995 } 1996 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1997 } 1998 1999 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2000 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2001 2002 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2003 if (mumps->petsc_size > 1) { 2004 PetscInt lsol_loc; 2005 PetscScalar *sol_loc; 2006 2007 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 2008 2009 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2010 if (mumps->x_seq) { 2011 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2012 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 2013 PetscCall(VecDestroy(&mumps->x_seq)); 2014 } 2015 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2016 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 2017 mumps->id.lsol_loc = lsol_loc; 2018 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 2019 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 2020 } 2021 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 2022 PetscFunctionReturn(PETSC_SUCCESS); 2023 } 2024 2025 /* Sets MUMPS options from the options database */ 2026 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2027 { 2028 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2029 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2030 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2031 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 2032 MumpsScalar *arr; 2033 2034 PetscFunctionBegin; 2035 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2036 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2037 PetscInt nthreads = 0; 2038 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2039 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2040 2041 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2042 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2043 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2044 2045 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2046 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2047 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2048 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2049 if (mumps->use_petsc_omp_support) { 2050 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2051 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2052 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2053 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2054 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2055 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2056 #endif 2057 } else { 2058 mumps->omp_comm = PETSC_COMM_SELF; 2059 mumps->mumps_comm = mumps->petsc_comm; 2060 mumps->is_omp_master = PETSC_TRUE; 2061 } 2062 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2063 mumps->reqs = NULL; 2064 mumps->tag = 0; 2065 2066 if (mumps->mumps_comm != MPI_COMM_NULL) { 2067 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2068 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2069 MPI_Comm comm; 2070 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2071 mumps->mumps_comm = comm; 2072 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2073 } 2074 2075 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2076 mumps->id.job = JOB_INIT; 2077 mumps->id.par = 1; /* host participates factorizaton and solve */ 2078 mumps->id.sym = mumps->sym; 2079 2080 size = mumps->id.size_schur; 2081 arr = mumps->id.schur; 2082 listvar_schur = mumps->id.listvar_schur; 2083 PetscMUMPS_c(mumps); 2084 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2085 /* restore cached ICNTL and CNTL values */ 2086 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2087 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2088 PetscCall(PetscFree(mumps->ICNTL_pre)); 2089 PetscCall(PetscFree(mumps->CNTL_pre)); 2090 2091 if (schur) { 2092 mumps->id.size_schur = size; 2093 mumps->id.schur_lld = size; 2094 mumps->id.schur = arr; 2095 mumps->id.listvar_schur = listvar_schur; 2096 if (mumps->petsc_size > 1) { 2097 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2098 2099 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2100 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2101 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2102 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2103 } else { 2104 if (F->factortype == MAT_FACTOR_LU) { 2105 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2106 } else { 2107 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2108 } 2109 } 2110 mumps->id.ICNTL(26) = -1; 2111 } 2112 2113 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2114 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2115 */ 2116 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2117 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2118 2119 mumps->scat_rhs = NULL; 2120 mumps->scat_sol = NULL; 2121 2122 /* set PETSc-MUMPS default options - override MUMPS default */ 2123 mumps->id.ICNTL(3) = 0; 2124 mumps->id.ICNTL(4) = 0; 2125 if (mumps->petsc_size == 1) { 2126 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2127 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2128 } else { 2129 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2130 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2131 } 2132 } 2133 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2134 if (flg) mumps->id.ICNTL(1) = icntl; 2135 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2136 if (flg) mumps->id.ICNTL(2) = icntl; 2137 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2138 if (flg) mumps->id.ICNTL(3) = icntl; 2139 2140 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2141 if (flg) mumps->id.ICNTL(4) = icntl; 2142 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2143 2144 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2145 if (flg) mumps->id.ICNTL(6) = icntl; 2146 2147 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2148 if (flg) { 2149 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2150 mumps->id.ICNTL(7) = icntl; 2151 } 2152 2153 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2154 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2155 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2156 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2157 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2158 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2159 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2160 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2161 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2162 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2163 if (flg) { 2164 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2165 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2166 } 2167 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2168 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2169 PetscCall(MatDestroy(&F->schur)); 2170 PetscCall(MatMumpsResetSchur_Private(mumps)); 2171 } 2172 2173 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2174 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2175 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2176 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2177 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2178 In short, we could not use distributed RHS with MPICH until v4.0b1. 2179 */ 2180 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 2181 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2182 #else 2183 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2184 #endif 2185 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2186 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2187 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2188 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2189 #endif 2190 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2191 2192 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2193 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2194 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2195 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2196 2197 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2198 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2199 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2200 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2201 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2202 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2203 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2204 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2205 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2206 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2207 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2208 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2209 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2210 2211 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2212 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2213 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2214 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2215 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2216 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2217 2218 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2219 2220 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2221 if (ninfo) { 2222 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2223 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2224 mumps->ninfo = ninfo; 2225 for (i = 0; i < ninfo; i++) { 2226 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2227 mumps->info[i] = info[i]; 2228 } 2229 } 2230 PetscOptionsEnd(); 2231 PetscFunctionReturn(PETSC_SUCCESS); 2232 } 2233 2234 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2235 { 2236 PetscFunctionBegin; 2237 if (mumps->id.INFOG(1) < 0) { 2238 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2239 if (mumps->id.INFOG(1) == -6) { 2240 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2241 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2242 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2243 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2244 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2245 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2246 PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n")); 2247 } else { 2248 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2249 F->factorerrortype = MAT_FACTOR_OTHER; 2250 } 2251 } 2252 PetscFunctionReturn(PETSC_SUCCESS); 2253 } 2254 2255 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2256 { 2257 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2258 Vec b; 2259 const PetscInt M = A->rmap->N; 2260 2261 PetscFunctionBegin; 2262 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2263 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2264 PetscFunctionReturn(PETSC_SUCCESS); 2265 } 2266 2267 /* Set MUMPS options from the options database */ 2268 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2269 2270 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2271 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2272 2273 /* analysis phase */ 2274 mumps->id.job = JOB_FACTSYMBOLIC; 2275 mumps->id.n = M; 2276 switch (mumps->id.ICNTL(18)) { 2277 case 0: /* centralized assembled matrix input */ 2278 if (!mumps->myid) { 2279 mumps->id.nnz = mumps->nnz; 2280 mumps->id.irn = mumps->irn; 2281 mumps->id.jcn = mumps->jcn; 2282 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2283 if (r) { 2284 mumps->id.ICNTL(7) = 1; 2285 if (!mumps->myid) { 2286 const PetscInt *idx; 2287 PetscInt i; 2288 2289 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2290 PetscCall(ISGetIndices(r, &idx)); 2291 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2292 PetscCall(ISRestoreIndices(r, &idx)); 2293 } 2294 } 2295 } 2296 break; 2297 case 3: /* distributed assembled matrix input (size>1) */ 2298 mumps->id.nnz_loc = mumps->nnz; 2299 mumps->id.irn_loc = mumps->irn; 2300 mumps->id.jcn_loc = mumps->jcn; 2301 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2302 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2303 PetscCall(MatCreateVecs(A, NULL, &b)); 2304 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2305 PetscCall(VecDestroy(&b)); 2306 } 2307 break; 2308 } 2309 PetscMUMPS_c(mumps); 2310 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2311 2312 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2313 F->ops->solve = MatSolve_MUMPS; 2314 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2315 F->ops->matsolve = MatMatSolve_MUMPS; 2316 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2317 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2318 2319 mumps->matstruc = SAME_NONZERO_PATTERN; 2320 PetscFunctionReturn(PETSC_SUCCESS); 2321 } 2322 2323 /* Note the Petsc r and c permutations are ignored */ 2324 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2325 { 2326 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2327 Vec b; 2328 const PetscInt M = A->rmap->N; 2329 2330 PetscFunctionBegin; 2331 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2332 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2333 PetscFunctionReturn(PETSC_SUCCESS); 2334 } 2335 2336 /* Set MUMPS options from the options database */ 2337 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2338 2339 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2340 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2341 2342 /* analysis phase */ 2343 mumps->id.job = JOB_FACTSYMBOLIC; 2344 mumps->id.n = M; 2345 switch (mumps->id.ICNTL(18)) { 2346 case 0: /* centralized assembled matrix input */ 2347 if (!mumps->myid) { 2348 mumps->id.nnz = mumps->nnz; 2349 mumps->id.irn = mumps->irn; 2350 mumps->id.jcn = mumps->jcn; 2351 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2352 } 2353 break; 2354 case 3: /* distributed assembled matrix input (size>1) */ 2355 mumps->id.nnz_loc = mumps->nnz; 2356 mumps->id.irn_loc = mumps->irn; 2357 mumps->id.jcn_loc = mumps->jcn; 2358 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2359 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2360 PetscCall(MatCreateVecs(A, NULL, &b)); 2361 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2362 PetscCall(VecDestroy(&b)); 2363 } 2364 break; 2365 } 2366 PetscMUMPS_c(mumps); 2367 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2368 2369 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2370 F->ops->solve = MatSolve_MUMPS; 2371 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2372 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2373 2374 mumps->matstruc = SAME_NONZERO_PATTERN; 2375 PetscFunctionReturn(PETSC_SUCCESS); 2376 } 2377 2378 /* Note the Petsc r permutation and factor info are ignored */ 2379 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2380 { 2381 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2382 Vec b; 2383 const PetscInt M = A->rmap->N; 2384 2385 PetscFunctionBegin; 2386 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2387 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2388 PetscFunctionReturn(PETSC_SUCCESS); 2389 } 2390 2391 /* Set MUMPS options from the options database */ 2392 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2393 2394 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2395 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2396 2397 /* analysis phase */ 2398 mumps->id.job = JOB_FACTSYMBOLIC; 2399 mumps->id.n = M; 2400 switch (mumps->id.ICNTL(18)) { 2401 case 0: /* centralized assembled matrix input */ 2402 if (!mumps->myid) { 2403 mumps->id.nnz = mumps->nnz; 2404 mumps->id.irn = mumps->irn; 2405 mumps->id.jcn = mumps->jcn; 2406 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2407 } 2408 break; 2409 case 3: /* distributed assembled matrix input (size>1) */ 2410 mumps->id.nnz_loc = mumps->nnz; 2411 mumps->id.irn_loc = mumps->irn; 2412 mumps->id.jcn_loc = mumps->jcn; 2413 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2414 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2415 PetscCall(MatCreateVecs(A, NULL, &b)); 2416 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2417 PetscCall(VecDestroy(&b)); 2418 } 2419 break; 2420 } 2421 PetscMUMPS_c(mumps); 2422 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2423 2424 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2425 F->ops->solve = MatSolve_MUMPS; 2426 F->ops->solvetranspose = MatSolve_MUMPS; 2427 F->ops->matsolve = MatMatSolve_MUMPS; 2428 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2429 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2430 #if defined(PETSC_USE_COMPLEX) 2431 F->ops->getinertia = NULL; 2432 #else 2433 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2434 #endif 2435 2436 mumps->matstruc = SAME_NONZERO_PATTERN; 2437 PetscFunctionReturn(PETSC_SUCCESS); 2438 } 2439 2440 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2441 { 2442 PetscBool iascii; 2443 PetscViewerFormat format; 2444 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2445 2446 PetscFunctionBegin; 2447 /* check if matrix is mumps type */ 2448 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2449 2450 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2451 if (iascii) { 2452 PetscCall(PetscViewerGetFormat(viewer, &format)); 2453 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2454 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2455 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2456 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2457 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2458 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2459 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2460 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2461 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2462 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2463 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2464 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2465 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2466 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2467 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2468 if (mumps->id.ICNTL(11) > 0) { 2469 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2470 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2471 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2472 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2473 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2474 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2475 } 2476 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2477 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2478 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2479 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2480 /* ICNTL(15-17) not used */ 2481 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2482 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2483 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2484 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2485 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2486 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2487 2488 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2489 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2490 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2491 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2492 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2493 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2494 2495 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2496 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2497 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2498 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2499 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2500 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2501 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2502 2503 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2504 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2505 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2506 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2507 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2508 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2509 2510 /* information local to each processor */ 2511 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2512 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2513 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2514 PetscCall(PetscViewerFlush(viewer)); 2515 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2516 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2517 PetscCall(PetscViewerFlush(viewer)); 2518 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2519 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2520 PetscCall(PetscViewerFlush(viewer)); 2521 2522 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2523 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2524 PetscCall(PetscViewerFlush(viewer)); 2525 2526 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2527 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2528 PetscCall(PetscViewerFlush(viewer)); 2529 2530 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2531 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2532 PetscCall(PetscViewerFlush(viewer)); 2533 2534 if (mumps->ninfo && mumps->ninfo <= 80) { 2535 PetscInt i; 2536 for (i = 0; i < mumps->ninfo; i++) { 2537 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2538 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2539 PetscCall(PetscViewerFlush(viewer)); 2540 } 2541 } 2542 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2543 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2544 2545 if (mumps->myid == 0) { /* information from the host */ 2546 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2547 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2548 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2549 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2550 2551 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2552 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2553 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2554 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2555 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2556 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2557 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2558 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2559 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2560 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2561 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2562 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2563 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2564 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2565 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2566 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2567 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2568 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2569 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2570 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2571 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2572 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2573 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2574 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2575 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2576 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2577 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2578 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2579 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2580 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2581 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2582 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2583 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2584 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2585 } 2586 } 2587 } 2588 PetscFunctionReturn(PETSC_SUCCESS); 2589 } 2590 2591 static PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2592 { 2593 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2594 2595 PetscFunctionBegin; 2596 info->block_size = 1.0; 2597 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2598 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2599 info->nz_unneeded = 0.0; 2600 info->assemblies = 0.0; 2601 info->mallocs = 0.0; 2602 info->memory = 0.0; 2603 info->fill_ratio_given = 0; 2604 info->fill_ratio_needed = 0; 2605 info->factor_mallocs = 0; 2606 PetscFunctionReturn(PETSC_SUCCESS); 2607 } 2608 2609 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2610 { 2611 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2612 const PetscScalar *arr; 2613 const PetscInt *idxs; 2614 PetscInt size, i; 2615 2616 PetscFunctionBegin; 2617 PetscCall(ISGetLocalSize(is, &size)); 2618 /* Schur complement matrix */ 2619 PetscCall(MatDestroy(&F->schur)); 2620 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2621 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2622 mumps->id.schur = (MumpsScalar *)arr; 2623 mumps->id.size_schur = size; 2624 mumps->id.schur_lld = size; 2625 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2626 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2627 2628 /* MUMPS expects Fortran style indices */ 2629 PetscCall(PetscFree(mumps->id.listvar_schur)); 2630 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2631 PetscCall(ISGetIndices(is, &idxs)); 2632 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2633 PetscCall(ISRestoreIndices(is, &idxs)); 2634 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2635 mumps->id.ICNTL(26) = -1; 2636 PetscFunctionReturn(PETSC_SUCCESS); 2637 } 2638 2639 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2640 { 2641 Mat St; 2642 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2643 PetscScalar *array; 2644 2645 PetscFunctionBegin; 2646 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 2647 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2648 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2649 PetscCall(MatSetType(St, MATDENSE)); 2650 PetscCall(MatSetUp(St)); 2651 PetscCall(MatDenseGetArray(St, &array)); 2652 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2653 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2654 PetscInt i, j, N = mumps->id.size_schur; 2655 for (i = 0; i < N; i++) { 2656 for (j = 0; j < N; j++) { 2657 #if !defined(PETSC_USE_COMPLEX) 2658 PetscScalar val = mumps->id.schur[i * N + j]; 2659 #else 2660 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2661 #endif 2662 array[j * N + i] = val; 2663 } 2664 } 2665 } else { /* stored by columns */ 2666 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2667 } 2668 } else { /* either full or lower-triangular (not packed) */ 2669 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2670 PetscInt i, j, N = mumps->id.size_schur; 2671 for (i = 0; i < N; i++) { 2672 for (j = i; j < N; j++) { 2673 #if !defined(PETSC_USE_COMPLEX) 2674 PetscScalar val = mumps->id.schur[i * N + j]; 2675 #else 2676 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2677 #endif 2678 array[i * N + j] = array[j * N + i] = val; 2679 } 2680 } 2681 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2682 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2683 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2684 PetscInt i, j, N = mumps->id.size_schur; 2685 for (i = 0; i < N; i++) { 2686 for (j = 0; j < i + 1; j++) { 2687 #if !defined(PETSC_USE_COMPLEX) 2688 PetscScalar val = mumps->id.schur[i * N + j]; 2689 #else 2690 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2691 #endif 2692 array[i * N + j] = array[j * N + i] = val; 2693 } 2694 } 2695 } 2696 } 2697 PetscCall(MatDenseRestoreArray(St, &array)); 2698 *S = St; 2699 PetscFunctionReturn(PETSC_SUCCESS); 2700 } 2701 2702 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2703 { 2704 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2705 2706 PetscFunctionBegin; 2707 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2708 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2709 for (i = 0; i < nICNTL_pre; ++i) 2710 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2711 if (i == nICNTL_pre) { /* not already cached */ 2712 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2713 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2714 mumps->ICNTL_pre[0]++; 2715 } 2716 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2717 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2718 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2719 PetscFunctionReturn(PETSC_SUCCESS); 2720 } 2721 2722 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2723 { 2724 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2725 2726 PetscFunctionBegin; 2727 if (mumps->id.job == JOB_NULL) { 2728 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2729 *ival = 0; 2730 for (i = 0; i < nICNTL_pre; ++i) { 2731 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2732 } 2733 } else *ival = mumps->id.ICNTL(icntl); 2734 PetscFunctionReturn(PETSC_SUCCESS); 2735 } 2736 2737 /*@ 2738 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2739 2740 Logically Collective 2741 2742 Input Parameters: 2743 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2744 . icntl - index of MUMPS parameter array ICNTL() 2745 - ival - value of MUMPS ICNTL(icntl) 2746 2747 Options Database Key: 2748 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2749 2750 Level: beginner 2751 2752 References: 2753 . * - MUMPS Users' Guide 2754 2755 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2756 @*/ 2757 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2758 { 2759 PetscFunctionBegin; 2760 PetscValidType(F, 1); 2761 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2762 PetscValidLogicalCollectiveInt(F, icntl, 2); 2763 PetscValidLogicalCollectiveInt(F, ival, 3); 2764 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2765 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2766 PetscFunctionReturn(PETSC_SUCCESS); 2767 } 2768 2769 /*@ 2770 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2771 2772 Logically Collective 2773 2774 Input Parameters: 2775 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2776 - icntl - index of MUMPS parameter array ICNTL() 2777 2778 Output Parameter: 2779 . ival - value of MUMPS ICNTL(icntl) 2780 2781 Level: beginner 2782 2783 References: 2784 . * - MUMPS Users' Guide 2785 2786 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2787 @*/ 2788 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2789 { 2790 PetscFunctionBegin; 2791 PetscValidType(F, 1); 2792 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2793 PetscValidLogicalCollectiveInt(F, icntl, 2); 2794 PetscAssertPointer(ival, 3); 2795 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2796 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2797 PetscFunctionReturn(PETSC_SUCCESS); 2798 } 2799 2800 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2801 { 2802 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2803 2804 PetscFunctionBegin; 2805 if (mumps->id.job == JOB_NULL) { 2806 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2807 for (i = 0; i < nCNTL_pre; ++i) 2808 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2809 if (i == nCNTL_pre) { 2810 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2811 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2812 mumps->CNTL_pre[0]++; 2813 } 2814 mumps->CNTL_pre[1 + 2 * i] = icntl; 2815 mumps->CNTL_pre[2 + 2 * i] = val; 2816 } else mumps->id.CNTL(icntl) = val; 2817 PetscFunctionReturn(PETSC_SUCCESS); 2818 } 2819 2820 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2821 { 2822 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2823 2824 PetscFunctionBegin; 2825 if (mumps->id.job == JOB_NULL) { 2826 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2827 *val = 0.0; 2828 for (i = 0; i < nCNTL_pre; ++i) { 2829 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2830 } 2831 } else *val = mumps->id.CNTL(icntl); 2832 PetscFunctionReturn(PETSC_SUCCESS); 2833 } 2834 2835 /*@ 2836 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2837 2838 Logically Collective 2839 2840 Input Parameters: 2841 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2842 . icntl - index of MUMPS parameter array CNTL() 2843 - val - value of MUMPS CNTL(icntl) 2844 2845 Options Database Key: 2846 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2847 2848 Level: beginner 2849 2850 References: 2851 . * - MUMPS Users' Guide 2852 2853 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2854 @*/ 2855 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2856 { 2857 PetscFunctionBegin; 2858 PetscValidType(F, 1); 2859 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2860 PetscValidLogicalCollectiveInt(F, icntl, 2); 2861 PetscValidLogicalCollectiveReal(F, val, 3); 2862 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2863 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2864 PetscFunctionReturn(PETSC_SUCCESS); 2865 } 2866 2867 /*@ 2868 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2869 2870 Logically Collective 2871 2872 Input Parameters: 2873 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2874 - icntl - index of MUMPS parameter array CNTL() 2875 2876 Output Parameter: 2877 . val - value of MUMPS CNTL(icntl) 2878 2879 Level: beginner 2880 2881 References: 2882 . * - MUMPS Users' Guide 2883 2884 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2885 @*/ 2886 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2887 { 2888 PetscFunctionBegin; 2889 PetscValidType(F, 1); 2890 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2891 PetscValidLogicalCollectiveInt(F, icntl, 2); 2892 PetscAssertPointer(val, 3); 2893 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2894 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2895 PetscFunctionReturn(PETSC_SUCCESS); 2896 } 2897 2898 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2899 { 2900 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2901 2902 PetscFunctionBegin; 2903 *info = mumps->id.INFO(icntl); 2904 PetscFunctionReturn(PETSC_SUCCESS); 2905 } 2906 2907 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2908 { 2909 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2910 2911 PetscFunctionBegin; 2912 *infog = mumps->id.INFOG(icntl); 2913 PetscFunctionReturn(PETSC_SUCCESS); 2914 } 2915 2916 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2917 { 2918 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2919 2920 PetscFunctionBegin; 2921 *rinfo = mumps->id.RINFO(icntl); 2922 PetscFunctionReturn(PETSC_SUCCESS); 2923 } 2924 2925 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2926 { 2927 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2928 2929 PetscFunctionBegin; 2930 *rinfog = mumps->id.RINFOG(icntl); 2931 PetscFunctionReturn(PETSC_SUCCESS); 2932 } 2933 2934 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2935 { 2936 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2937 2938 PetscFunctionBegin; 2939 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2940 *size = 0; 2941 *array = NULL; 2942 if (!mumps->myid) { 2943 *size = mumps->id.INFOG(28); 2944 PetscCall(PetscMalloc1(*size, array)); 2945 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2946 } 2947 PetscFunctionReturn(PETSC_SUCCESS); 2948 } 2949 2950 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2951 { 2952 Mat Bt = NULL, Btseq = NULL; 2953 PetscBool flg; 2954 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2955 PetscScalar *aa; 2956 PetscInt spnr, *ia, *ja, M, nrhs; 2957 2958 PetscFunctionBegin; 2959 PetscAssertPointer(spRHS, 2); 2960 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2961 if (flg) { 2962 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2963 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2964 2965 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2966 2967 if (mumps->petsc_size > 1) { 2968 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2969 Btseq = b->A; 2970 } else { 2971 Btseq = Bt; 2972 } 2973 2974 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2975 mumps->id.nrhs = nrhs; 2976 mumps->id.lrhs = M; 2977 mumps->id.rhs = NULL; 2978 2979 if (!mumps->myid) { 2980 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2981 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2982 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2983 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2984 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2985 } else { 2986 mumps->id.irhs_ptr = NULL; 2987 mumps->id.irhs_sparse = NULL; 2988 mumps->id.nz_rhs = 0; 2989 mumps->id.rhs_sparse = NULL; 2990 } 2991 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2992 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2993 2994 /* solve phase */ 2995 mumps->id.job = JOB_SOLVE; 2996 PetscMUMPS_c(mumps); 2997 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2998 2999 if (!mumps->myid) { 3000 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3001 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3002 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3003 } 3004 PetscFunctionReturn(PETSC_SUCCESS); 3005 } 3006 3007 /*@ 3008 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 3009 3010 Logically Collective 3011 3012 Input Parameter: 3013 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3014 3015 Output Parameter: 3016 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3017 3018 Level: beginner 3019 3020 References: 3021 . * - MUMPS Users' Guide 3022 3023 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3024 @*/ 3025 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3026 { 3027 PetscFunctionBegin; 3028 PetscValidType(F, 1); 3029 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3030 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3031 PetscFunctionReturn(PETSC_SUCCESS); 3032 } 3033 3034 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3035 { 3036 Mat spRHS; 3037 3038 PetscFunctionBegin; 3039 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3040 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3041 PetscCall(MatDestroy(&spRHS)); 3042 PetscFunctionReturn(PETSC_SUCCESS); 3043 } 3044 3045 /*@ 3046 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 3047 3048 Logically Collective 3049 3050 Input Parameter: 3051 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3052 3053 Output Parameter: 3054 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3055 3056 Level: beginner 3057 3058 References: 3059 . * - MUMPS Users' Guide 3060 3061 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3062 @*/ 3063 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3064 { 3065 PetscBool flg; 3066 3067 PetscFunctionBegin; 3068 PetscValidType(F, 1); 3069 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3070 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3071 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3072 3073 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3074 PetscFunctionReturn(PETSC_SUCCESS); 3075 } 3076 3077 /*@ 3078 MatMumpsGetInfo - Get MUMPS parameter INFO() 3079 3080 Logically Collective 3081 3082 Input Parameters: 3083 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3084 - icntl - index of MUMPS parameter array INFO() 3085 3086 Output Parameter: 3087 . ival - value of MUMPS INFO(icntl) 3088 3089 Level: beginner 3090 3091 References: 3092 . * - MUMPS Users' Guide 3093 3094 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3095 @*/ 3096 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3097 { 3098 PetscFunctionBegin; 3099 PetscValidType(F, 1); 3100 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3101 PetscAssertPointer(ival, 3); 3102 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3103 PetscFunctionReturn(PETSC_SUCCESS); 3104 } 3105 3106 /*@ 3107 MatMumpsGetInfog - Get MUMPS parameter INFOG() 3108 3109 Logically Collective 3110 3111 Input Parameters: 3112 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3113 - icntl - index of MUMPS parameter array INFOG() 3114 3115 Output Parameter: 3116 . ival - value of MUMPS INFOG(icntl) 3117 3118 Level: beginner 3119 3120 References: 3121 . * - MUMPS Users' Guide 3122 3123 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3124 @*/ 3125 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3126 { 3127 PetscFunctionBegin; 3128 PetscValidType(F, 1); 3129 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3130 PetscAssertPointer(ival, 3); 3131 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3132 PetscFunctionReturn(PETSC_SUCCESS); 3133 } 3134 3135 /*@ 3136 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3137 3138 Logically Collective 3139 3140 Input Parameters: 3141 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3142 - icntl - index of MUMPS parameter array RINFO() 3143 3144 Output Parameter: 3145 . val - value of MUMPS RINFO(icntl) 3146 3147 Level: beginner 3148 3149 References: 3150 . * - MUMPS Users' Guide 3151 3152 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3153 @*/ 3154 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3155 { 3156 PetscFunctionBegin; 3157 PetscValidType(F, 1); 3158 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3159 PetscAssertPointer(val, 3); 3160 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3161 PetscFunctionReturn(PETSC_SUCCESS); 3162 } 3163 3164 /*@ 3165 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3166 3167 Logically Collective 3168 3169 Input Parameters: 3170 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3171 - icntl - index of MUMPS parameter array RINFOG() 3172 3173 Output Parameter: 3174 . val - value of MUMPS RINFOG(icntl) 3175 3176 Level: beginner 3177 3178 References: 3179 . * - MUMPS Users' Guide 3180 3181 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3182 @*/ 3183 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3184 { 3185 PetscFunctionBegin; 3186 PetscValidType(F, 1); 3187 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3188 PetscAssertPointer(val, 3); 3189 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3190 PetscFunctionReturn(PETSC_SUCCESS); 3191 } 3192 3193 /*@ 3194 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 3195 3196 Logically Collective 3197 3198 Input Parameter: 3199 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3200 3201 Output Parameters: 3202 + size - local size of the array. The size of the array is non-zero only on the host. 3203 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3204 for freeing this array. 3205 3206 Level: beginner 3207 3208 References: 3209 . * - MUMPS Users' Guide 3210 3211 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3212 @*/ 3213 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3214 { 3215 PetscFunctionBegin; 3216 PetscValidType(F, 1); 3217 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3218 PetscAssertPointer(size, 2); 3219 PetscAssertPointer(array, 3); 3220 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3221 PetscFunctionReturn(PETSC_SUCCESS); 3222 } 3223 3224 /*MC 3225 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3226 distributed and sequential matrices via the external package MUMPS. 3227 3228 Works with `MATAIJ` and `MATSBAIJ` matrices 3229 3230 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3231 3232 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3233 See details below. 3234 3235 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3236 3237 Options Database Keys: 3238 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3239 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3240 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3241 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3242 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3243 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3244 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3245 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3246 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3247 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3248 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3249 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3250 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3251 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3252 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3253 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3254 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3255 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3256 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3257 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3258 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3259 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 3260 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3261 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3262 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3263 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3264 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3265 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3266 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3267 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3268 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3269 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3270 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3271 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3272 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3273 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3274 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3275 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3276 3277 Level: beginner 3278 3279 Notes: 3280 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 3281 error if the matrix is Hermitian. 3282 3283 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3284 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3285 3286 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3287 the failure with 3288 .vb 3289 KSPGetPC(ksp,&pc); 3290 PCFactorGetMatrix(pc,&mat); 3291 MatMumpsGetInfo(mat,....); 3292 MatMumpsGetInfog(mat,....); etc. 3293 .ve 3294 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3295 3296 MUMPS provides 64-bit integer support in two build modes: 3297 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3298 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3299 3300 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3301 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3302 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3303 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3304 3305 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3306 3307 Two modes to run MUMPS/PETSc with OpenMP 3308 .vb 3309 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3310 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3311 .ve 3312 3313 .vb 3314 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3315 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3316 .ve 3317 3318 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3319 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3320 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3321 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3322 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3323 3324 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3325 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3326 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3327 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3328 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3329 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3330 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3331 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3332 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3333 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3334 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3335 examine the mapping result. 3336 3337 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3338 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3339 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3340 3341 References: 3342 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3343 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3344 3345 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3346 M*/ 3347 3348 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3349 { 3350 PetscFunctionBegin; 3351 *type = MATSOLVERMUMPS; 3352 PetscFunctionReturn(PETSC_SUCCESS); 3353 } 3354 3355 /* MatGetFactor for Seq and MPI AIJ matrices */ 3356 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3357 { 3358 Mat B; 3359 Mat_MUMPS *mumps; 3360 PetscBool isSeqAIJ, isDiag; 3361 PetscMPIInt size; 3362 3363 PetscFunctionBegin; 3364 #if defined(PETSC_USE_COMPLEX) 3365 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3366 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3367 *F = NULL; 3368 PetscFunctionReturn(PETSC_SUCCESS); 3369 } 3370 #endif 3371 /* Create the factorization matrix */ 3372 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3373 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 3374 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3375 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3376 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3377 PetscCall(MatSetUp(B)); 3378 3379 PetscCall(PetscNew(&mumps)); 3380 3381 B->ops->view = MatView_MUMPS; 3382 B->ops->getinfo = MatGetInfo_MUMPS; 3383 3384 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3385 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3386 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3387 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3388 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3389 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3390 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3391 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3392 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3393 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3394 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3395 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3396 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3397 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3398 3399 if (ftype == MAT_FACTOR_LU) { 3400 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3401 B->factortype = MAT_FACTOR_LU; 3402 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3403 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3404 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3405 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3406 mumps->sym = 0; 3407 } else { 3408 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3409 B->factortype = MAT_FACTOR_CHOLESKY; 3410 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3411 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3412 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3413 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3414 #if defined(PETSC_USE_COMPLEX) 3415 mumps->sym = 2; 3416 #else 3417 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3418 else mumps->sym = 2; 3419 #endif 3420 } 3421 3422 /* set solvertype */ 3423 PetscCall(PetscFree(B->solvertype)); 3424 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3425 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3426 if (size == 1) { 3427 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3428 B->canuseordering = PETSC_TRUE; 3429 } 3430 B->ops->destroy = MatDestroy_MUMPS; 3431 B->data = (void *)mumps; 3432 3433 *F = B; 3434 mumps->id.job = JOB_NULL; 3435 mumps->ICNTL_pre = NULL; 3436 mumps->CNTL_pre = NULL; 3437 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3438 PetscFunctionReturn(PETSC_SUCCESS); 3439 } 3440 3441 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3442 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3443 { 3444 Mat B; 3445 Mat_MUMPS *mumps; 3446 PetscBool isSeqSBAIJ; 3447 PetscMPIInt size; 3448 3449 PetscFunctionBegin; 3450 #if defined(PETSC_USE_COMPLEX) 3451 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3452 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3453 *F = NULL; 3454 PetscFunctionReturn(PETSC_SUCCESS); 3455 } 3456 #endif 3457 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3458 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3459 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3460 PetscCall(MatSetUp(B)); 3461 3462 PetscCall(PetscNew(&mumps)); 3463 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3464 if (isSeqSBAIJ) { 3465 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3466 } else { 3467 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3468 } 3469 3470 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3471 B->ops->view = MatView_MUMPS; 3472 B->ops->getinfo = MatGetInfo_MUMPS; 3473 3474 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3475 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3476 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3477 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3478 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3479 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3480 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3481 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3482 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3483 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3484 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3485 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3486 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3487 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3488 3489 B->factortype = MAT_FACTOR_CHOLESKY; 3490 #if defined(PETSC_USE_COMPLEX) 3491 mumps->sym = 2; 3492 #else 3493 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3494 else mumps->sym = 2; 3495 #endif 3496 3497 /* set solvertype */ 3498 PetscCall(PetscFree(B->solvertype)); 3499 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3500 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3501 if (size == 1) { 3502 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3503 B->canuseordering = PETSC_TRUE; 3504 } 3505 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3506 B->ops->destroy = MatDestroy_MUMPS; 3507 B->data = (void *)mumps; 3508 3509 *F = B; 3510 mumps->id.job = JOB_NULL; 3511 mumps->ICNTL_pre = NULL; 3512 mumps->CNTL_pre = NULL; 3513 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3514 PetscFunctionReturn(PETSC_SUCCESS); 3515 } 3516 3517 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3518 { 3519 Mat B; 3520 Mat_MUMPS *mumps; 3521 PetscBool isSeqBAIJ; 3522 PetscMPIInt size; 3523 3524 PetscFunctionBegin; 3525 /* Create the factorization matrix */ 3526 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3527 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3528 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3529 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3530 PetscCall(MatSetUp(B)); 3531 3532 PetscCall(PetscNew(&mumps)); 3533 if (ftype == MAT_FACTOR_LU) { 3534 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3535 B->factortype = MAT_FACTOR_LU; 3536 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3537 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3538 mumps->sym = 0; 3539 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3540 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3541 3542 B->ops->view = MatView_MUMPS; 3543 B->ops->getinfo = MatGetInfo_MUMPS; 3544 3545 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3546 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3547 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3548 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3549 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3550 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3551 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3552 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3553 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3554 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3555 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3556 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3557 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3558 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3559 3560 /* set solvertype */ 3561 PetscCall(PetscFree(B->solvertype)); 3562 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3563 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3564 if (size == 1) { 3565 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3566 B->canuseordering = PETSC_TRUE; 3567 } 3568 B->ops->destroy = MatDestroy_MUMPS; 3569 B->data = (void *)mumps; 3570 3571 *F = B; 3572 mumps->id.job = JOB_NULL; 3573 mumps->ICNTL_pre = NULL; 3574 mumps->CNTL_pre = NULL; 3575 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3576 PetscFunctionReturn(PETSC_SUCCESS); 3577 } 3578 3579 /* MatGetFactor for Seq and MPI SELL matrices */ 3580 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3581 { 3582 Mat B; 3583 Mat_MUMPS *mumps; 3584 PetscBool isSeqSELL; 3585 PetscMPIInt size; 3586 3587 PetscFunctionBegin; 3588 /* Create the factorization matrix */ 3589 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3590 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3591 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3592 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3593 PetscCall(MatSetUp(B)); 3594 3595 PetscCall(PetscNew(&mumps)); 3596 3597 B->ops->view = MatView_MUMPS; 3598 B->ops->getinfo = MatGetInfo_MUMPS; 3599 3600 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3601 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3602 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3603 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3604 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3605 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3606 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3607 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3608 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3609 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3610 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3611 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3612 3613 if (ftype == MAT_FACTOR_LU) { 3614 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3615 B->factortype = MAT_FACTOR_LU; 3616 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3617 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3618 mumps->sym = 0; 3619 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3620 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3621 3622 /* set solvertype */ 3623 PetscCall(PetscFree(B->solvertype)); 3624 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3625 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3626 if (size == 1) { 3627 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3628 B->canuseordering = PETSC_TRUE; 3629 } 3630 B->ops->destroy = MatDestroy_MUMPS; 3631 B->data = (void *)mumps; 3632 3633 *F = B; 3634 mumps->id.job = JOB_NULL; 3635 mumps->ICNTL_pre = NULL; 3636 mumps->CNTL_pre = NULL; 3637 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3638 PetscFunctionReturn(PETSC_SUCCESS); 3639 } 3640 3641 /* MatGetFactor for MATNEST matrices */ 3642 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3643 { 3644 Mat B, **mats; 3645 Mat_MUMPS *mumps; 3646 PetscInt nr, nc; 3647 PetscMPIInt size; 3648 PetscBool flg = PETSC_TRUE; 3649 3650 PetscFunctionBegin; 3651 #if defined(PETSC_USE_COMPLEX) 3652 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3653 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3654 *F = NULL; 3655 PetscFunctionReturn(PETSC_SUCCESS); 3656 } 3657 #endif 3658 3659 /* Return if some condition is not satisfied */ 3660 *F = NULL; 3661 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3662 if (ftype == MAT_FACTOR_CHOLESKY) { 3663 IS *rows, *cols; 3664 PetscInt *m, *M; 3665 3666 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3667 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3668 PetscCall(MatNestGetISs(A, rows, cols)); 3669 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3670 if (!flg) { 3671 PetscCall(PetscFree2(rows, cols)); 3672 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3673 PetscFunctionReturn(PETSC_SUCCESS); 3674 } 3675 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3676 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3677 for (PetscInt r = 0; flg && r < nr; r++) 3678 for (PetscInt k = r + 1; flg && k < nr; k++) 3679 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3680 PetscCall(PetscFree2(m, M)); 3681 PetscCall(PetscFree2(rows, cols)); 3682 if (!flg) { 3683 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3684 PetscFunctionReturn(PETSC_SUCCESS); 3685 } 3686 } 3687 3688 for (PetscInt r = 0; r < nr; r++) { 3689 for (PetscInt c = 0; c < nc; c++) { 3690 Mat sub = mats[r][c]; 3691 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isDiag; 3692 3693 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3694 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3695 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3696 else { 3697 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3698 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3699 } 3700 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3701 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3702 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3703 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3704 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3705 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3706 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 3707 if (ftype == MAT_FACTOR_CHOLESKY) { 3708 if (r == c) { 3709 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag) { 3710 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3711 flg = PETSC_FALSE; 3712 } 3713 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag) { 3714 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3715 flg = PETSC_FALSE; 3716 } 3717 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag) { 3718 PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3719 flg = PETSC_FALSE; 3720 } 3721 } 3722 } 3723 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3724 3725 /* Create the factorization matrix */ 3726 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3727 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3728 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3729 PetscCall(MatSetUp(B)); 3730 3731 PetscCall(PetscNew(&mumps)); 3732 3733 B->ops->view = MatView_MUMPS; 3734 B->ops->getinfo = MatGetInfo_MUMPS; 3735 3736 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3737 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3738 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3739 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3740 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3741 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3742 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3743 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3744 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3745 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3746 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3747 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3748 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3749 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3750 3751 if (ftype == MAT_FACTOR_LU) { 3752 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3753 B->factortype = MAT_FACTOR_LU; 3754 mumps->sym = 0; 3755 } else { 3756 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3757 B->factortype = MAT_FACTOR_CHOLESKY; 3758 #if defined(PETSC_USE_COMPLEX) 3759 mumps->sym = 2; 3760 #else 3761 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3762 else mumps->sym = 2; 3763 #endif 3764 } 3765 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3766 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3767 3768 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3769 if (size == 1) { 3770 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3771 B->canuseordering = PETSC_TRUE; 3772 } 3773 3774 /* set solvertype */ 3775 PetscCall(PetscFree(B->solvertype)); 3776 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3777 B->ops->destroy = MatDestroy_MUMPS; 3778 B->data = (void *)mumps; 3779 3780 *F = B; 3781 mumps->id.job = JOB_NULL; 3782 mumps->ICNTL_pre = NULL; 3783 mumps->CNTL_pre = NULL; 3784 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3785 PetscFunctionReturn(PETSC_SUCCESS); 3786 } 3787 3788 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3789 { 3790 PetscFunctionBegin; 3791 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3792 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3793 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3794 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3795 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3796 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3797 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3798 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3799 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3800 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3801 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3802 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3803 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3804 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3805 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3806 PetscFunctionReturn(PETSC_SUCCESS); 3807 } 3808