1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <petscsf.h> 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 9 #include <../src/mat/impls/sell/mpi/mpisell.h> 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #if defined(PETSC_USE_REAL_SINGLE) 14 #include <cmumps_c.h> 15 #else 16 #include <zmumps_c.h> 17 #endif 18 #else 19 #if defined(PETSC_USE_REAL_SINGLE) 20 #include <smumps_c.h> 21 #else 22 #include <dmumps_c.h> 23 #endif 24 #endif 25 EXTERN_C_END 26 #define JOB_INIT -1 27 #define JOB_NULL 0 28 #define JOB_FACTSYMBOLIC 1 29 #define JOB_FACTNUMERIC 2 30 #define JOB_SOLVE 3 31 #define JOB_END -2 32 33 /* calls to MUMPS */ 34 #if defined(PETSC_USE_COMPLEX) 35 #if defined(PETSC_USE_REAL_SINGLE) 36 #define MUMPS_c cmumps_c 37 #else 38 #define MUMPS_c zmumps_c 39 #endif 40 #else 41 #if defined(PETSC_USE_REAL_SINGLE) 42 #define MUMPS_c smumps_c 43 #else 44 #define MUMPS_c dmumps_c 45 #endif 46 #endif 47 48 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 49 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 50 naming convention in PetscMPIInt, PetscBLASInt etc. 51 */ 52 typedef MUMPS_INT PetscMUMPSInt; 53 54 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 55 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 56 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 57 #endif 58 #else 59 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 60 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 61 #endif 62 #endif 63 64 #define MPIU_MUMPSINT MPI_INT 65 #define PETSC_MUMPS_INT_MAX 2147483647 66 #define PETSC_MUMPS_INT_MIN -2147483648 67 68 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 69 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 70 { 71 PetscFunctionBegin; 72 #if PetscDefined(USE_64BIT_INDICES) 73 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 74 #endif 75 *b = (PetscMUMPSInt)(a); 76 PetscFunctionReturn(PETSC_SUCCESS); 77 } 78 79 /* Put these utility routines here since they are only used in this file */ 80 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 81 { 82 PetscInt myval; 83 PetscBool myset; 84 PetscFunctionBegin; 85 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 86 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 87 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 88 if (set) *set = myset; 89 PetscFunctionReturn(PETSC_SUCCESS); 90 } 91 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 92 93 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 94 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 95 #define PetscMUMPS_c(mumps) \ 96 do { \ 97 if (mumps->use_petsc_omp_support) { \ 98 if (mumps->is_omp_master) { \ 99 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 100 MUMPS_c(&mumps->id); \ 101 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 102 } \ 103 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 104 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 105 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 106 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 107 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 108 */ \ 109 PetscCallMPI(MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 110 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm)); \ 111 PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 112 } else { \ 113 MUMPS_c(&mumps->id); \ 114 } \ 115 } while (0) 116 #else 117 #define PetscMUMPS_c(mumps) \ 118 do { \ 119 MUMPS_c(&mumps->id); \ 120 } while (0) 121 #endif 122 123 /* declare MumpsScalar */ 124 #if defined(PETSC_USE_COMPLEX) 125 #if defined(PETSC_USE_REAL_SINGLE) 126 #define MumpsScalar mumps_complex 127 #else 128 #define MumpsScalar mumps_double_complex 129 #endif 130 #else 131 #define MumpsScalar PetscScalar 132 #endif 133 134 /* macros s.t. indices match MUMPS documentation */ 135 #define ICNTL(I) icntl[(I)-1] 136 #define CNTL(I) cntl[(I)-1] 137 #define INFOG(I) infog[(I)-1] 138 #define INFO(I) info[(I)-1] 139 #define RINFOG(I) rinfog[(I)-1] 140 #define RINFO(I) rinfo[(I)-1] 141 142 typedef struct Mat_MUMPS Mat_MUMPS; 143 struct Mat_MUMPS { 144 #if defined(PETSC_USE_COMPLEX) 145 #if defined(PETSC_USE_REAL_SINGLE) 146 CMUMPS_STRUC_C id; 147 #else 148 ZMUMPS_STRUC_C id; 149 #endif 150 #else 151 #if defined(PETSC_USE_REAL_SINGLE) 152 SMUMPS_STRUC_C id; 153 #else 154 DMUMPS_STRUC_C id; 155 #endif 156 #endif 157 158 MatStructure matstruc; 159 PetscMPIInt myid, petsc_size; 160 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 161 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 162 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 163 PetscMUMPSInt sym; 164 MPI_Comm mumps_comm; 165 PetscMUMPSInt *ICNTL_pre; 166 PetscReal *CNTL_pre; 167 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 168 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 169 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 170 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 171 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 172 PetscInt *rhs_nrow, max_nrhs; 173 PetscMPIInt *rhs_recvcounts, *rhs_disps; 174 PetscScalar *rhs_loc, *rhs_recvbuf; 175 #endif 176 Vec b_seq, x_seq; 177 PetscInt ninfo, *info; /* which INFO to display */ 178 PetscInt sizeredrhs; 179 PetscScalar *schur_sol; 180 PetscInt schur_sizesol; 181 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 182 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 183 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 184 185 /* Support for MATNEST */ 186 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 187 PetscInt64 *nest_vals_start; 188 PetscScalar *nest_vals; 189 190 /* stuff used by petsc/mumps OpenMP support*/ 191 PetscBool use_petsc_omp_support; 192 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 193 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 194 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 195 PetscMPIInt tag, omp_comm_size; 196 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 197 MPI_Request *reqs; 198 }; 199 200 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 201 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 202 */ 203 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 204 { 205 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 206 207 PetscFunctionBegin; 208 #if defined(PETSC_USE_64BIT_INDICES) 209 { 210 PetscInt i; 211 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 212 PetscCall(PetscFree(mumps->ia_alloc)); 213 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 214 mumps->cur_ilen = nrow + 1; 215 } 216 if (nnz > mumps->cur_jlen) { 217 PetscCall(PetscFree(mumps->ja_alloc)); 218 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 219 mumps->cur_jlen = nnz; 220 } 221 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 222 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 223 *ia_mumps = mumps->ia_alloc; 224 *ja_mumps = mumps->ja_alloc; 225 } 226 #else 227 *ia_mumps = ia; 228 *ja_mumps = ja; 229 #endif 230 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 231 PetscFunctionReturn(PETSC_SUCCESS); 232 } 233 234 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 235 { 236 PetscFunctionBegin; 237 PetscCall(PetscFree(mumps->id.listvar_schur)); 238 PetscCall(PetscFree(mumps->id.redrhs)); 239 PetscCall(PetscFree(mumps->schur_sol)); 240 mumps->id.size_schur = 0; 241 mumps->id.schur_lld = 0; 242 mumps->id.ICNTL(19) = 0; 243 PetscFunctionReturn(PETSC_SUCCESS); 244 } 245 246 /* solve with rhs in mumps->id.redrhs and return in the same location */ 247 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 248 { 249 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 250 Mat S, B, X; 251 MatFactorSchurStatus schurstatus; 252 PetscInt sizesol; 253 254 PetscFunctionBegin; 255 PetscCall(MatFactorFactorizeSchurComplement(F)); 256 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 257 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 258 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 259 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 260 PetscCall(MatBindToCPU(B, S->boundtocpu)); 261 #endif 262 switch (schurstatus) { 263 case MAT_FACTOR_SCHUR_FACTORED: 264 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 265 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 266 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 267 PetscCall(MatBindToCPU(X, S->boundtocpu)); 268 #endif 269 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 270 PetscCall(MatMatSolveTranspose(S, B, X)); 271 } else { 272 PetscCall(MatMatSolve(S, B, X)); 273 } 274 break; 275 case MAT_FACTOR_SCHUR_INVERTED: 276 sizesol = mumps->id.nrhs * mumps->id.size_schur; 277 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 278 PetscCall(PetscFree(mumps->schur_sol)); 279 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 280 mumps->schur_sizesol = sizesol; 281 } 282 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 283 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 284 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 285 PetscCall(MatBindToCPU(X, S->boundtocpu)); 286 #endif 287 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 288 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 289 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 290 } else { 291 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 292 } 293 PetscCall(MatProductSetFromOptions(X)); 294 PetscCall(MatProductSymbolic(X)); 295 PetscCall(MatProductNumeric(X)); 296 297 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 298 break; 299 default: 300 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 301 } 302 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 303 PetscCall(MatDestroy(&B)); 304 PetscCall(MatDestroy(&X)); 305 PetscFunctionReturn(PETSC_SUCCESS); 306 } 307 308 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 309 { 310 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 311 312 PetscFunctionBegin; 313 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 if (!expansion) { /* prepare for the condensation step */ 317 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 318 /* allocate MUMPS internal array to store reduced right-hand sides */ 319 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 320 PetscCall(PetscFree(mumps->id.redrhs)); 321 mumps->id.lredrhs = mumps->id.size_schur; 322 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 323 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 324 } 325 mumps->id.ICNTL(26) = 1; /* condensation phase */ 326 } else { /* prepare for the expansion step */ 327 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 328 PetscCall(MatMumpsSolveSchur_Private(F)); 329 mumps->id.ICNTL(26) = 2; /* expansion phase */ 330 PetscMUMPS_c(mumps); 331 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 332 /* restore defaults */ 333 mumps->id.ICNTL(26) = -1; 334 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 335 if (mumps->id.nrhs > 1) { 336 PetscCall(PetscFree(mumps->id.redrhs)); 337 mumps->id.lredrhs = 0; 338 mumps->sizeredrhs = 0; 339 } 340 } 341 PetscFunctionReturn(PETSC_SUCCESS); 342 } 343 344 /* 345 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 346 347 input: 348 A - matrix in aij,baij or sbaij format 349 shift - 0: C style output triple; 1: Fortran style output triple. 350 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 351 MAT_REUSE_MATRIX: only the values in v array are updated 352 output: 353 nnz - dim of r, c, and v (number of local nonzero entries of A) 354 r, c, v - row and col index, matrix values (matrix triples) 355 356 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 357 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 358 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 359 360 */ 361 362 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 363 { 364 const PetscScalar *av; 365 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 366 PetscInt64 nz, rnz, i, j, k; 367 PetscMUMPSInt *row, *col; 368 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 369 370 PetscFunctionBegin; 371 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 372 mumps->val = (PetscScalar *)av; 373 if (reuse == MAT_INITIAL_MATRIX) { 374 nz = aa->nz; 375 ai = aa->i; 376 aj = aa->j; 377 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 378 for (i = k = 0; i < M; i++) { 379 rnz = ai[i + 1] - ai[i]; 380 ajj = aj + ai[i]; 381 for (j = 0; j < rnz; j++) { 382 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 383 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 384 k++; 385 } 386 } 387 mumps->irn = row; 388 mumps->jcn = col; 389 mumps->nnz = nz; 390 } 391 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 392 PetscFunctionReturn(PETSC_SUCCESS); 393 } 394 395 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 396 { 397 PetscInt64 nz, i, j, k, r; 398 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 399 PetscMUMPSInt *row, *col; 400 401 PetscFunctionBegin; 402 mumps->val = a->val; 403 if (reuse == MAT_INITIAL_MATRIX) { 404 nz = a->sliidx[a->totalslices]; 405 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 406 for (i = k = 0; i < a->totalslices; i++) { 407 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 408 } 409 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 410 mumps->irn = row; 411 mumps->jcn = col; 412 mumps->nnz = nz; 413 } 414 PetscFunctionReturn(PETSC_SUCCESS); 415 } 416 417 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 418 { 419 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 420 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 421 PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 422 PetscInt bs; 423 PetscMUMPSInt *row, *col; 424 425 PetscFunctionBegin; 426 PetscCall(MatGetBlockSize(A, &bs)); 427 M = A->rmap->N / bs; 428 mumps->val = aa->a; 429 if (reuse == MAT_INITIAL_MATRIX) { 430 ai = aa->i; 431 aj = aa->j; 432 nz = bs2 * aa->nz; 433 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 434 for (i = 0; i < M; i++) { 435 ajj = aj + ai[i]; 436 rnz = ai[i + 1] - ai[i]; 437 for (k = 0; k < rnz; k++) { 438 for (j = 0; j < bs; j++) { 439 for (m = 0; m < bs; m++) { 440 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 441 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 442 idx++; 443 } 444 } 445 } 446 } 447 mumps->irn = row; 448 mumps->jcn = col; 449 mumps->nnz = nz; 450 } 451 PetscFunctionReturn(PETSC_SUCCESS); 452 } 453 454 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 455 { 456 const PetscInt *ai, *aj, *ajj; 457 PetscInt bs; 458 PetscInt64 nz, rnz, i, j, k, m; 459 PetscMUMPSInt *row, *col; 460 PetscScalar *val; 461 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 462 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 463 #if defined(PETSC_USE_COMPLEX) 464 PetscBool isset, hermitian; 465 #endif 466 467 PetscFunctionBegin; 468 #if defined(PETSC_USE_COMPLEX) 469 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 470 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 471 #endif 472 ai = aa->i; 473 aj = aa->j; 474 PetscCall(MatGetBlockSize(A, &bs)); 475 if (reuse == MAT_INITIAL_MATRIX) { 476 const PetscInt64 alloc_size = aa->nz * bs2; 477 478 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 479 if (bs > 1) { 480 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 481 mumps->val = mumps->val_alloc; 482 } else { 483 mumps->val = aa->a; 484 } 485 mumps->irn = row; 486 mumps->jcn = col; 487 } else { 488 if (bs == 1) mumps->val = aa->a; 489 row = mumps->irn; 490 col = mumps->jcn; 491 } 492 val = mumps->val; 493 494 nz = 0; 495 if (bs > 1) { 496 for (i = 0; i < mbs; i++) { 497 rnz = ai[i + 1] - ai[i]; 498 ajj = aj + ai[i]; 499 for (j = 0; j < rnz; j++) { 500 for (k = 0; k < bs; k++) { 501 for (m = 0; m < bs; m++) { 502 if (ajj[j] > i || k >= m) { 503 if (reuse == MAT_INITIAL_MATRIX) { 504 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 505 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 506 } 507 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 508 } 509 } 510 } 511 } 512 } 513 } else if (reuse == MAT_INITIAL_MATRIX) { 514 for (i = 0; i < mbs; i++) { 515 rnz = ai[i + 1] - ai[i]; 516 ajj = aj + ai[i]; 517 for (j = 0; j < rnz; j++) { 518 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 519 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 520 nz++; 521 } 522 } 523 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 524 } 525 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 530 { 531 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 532 PetscInt64 nz, rnz, i, j; 533 const PetscScalar *av, *v1; 534 PetscScalar *val; 535 PetscMUMPSInt *row, *col; 536 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 537 PetscBool missing; 538 #if defined(PETSC_USE_COMPLEX) 539 PetscBool hermitian, isset; 540 #endif 541 542 PetscFunctionBegin; 543 #if defined(PETSC_USE_COMPLEX) 544 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 545 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 546 #endif 547 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 548 ai = aa->i; 549 aj = aa->j; 550 adiag = aa->diag; 551 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 552 if (reuse == MAT_INITIAL_MATRIX) { 553 /* count nz in the upper triangular part of A */ 554 nz = 0; 555 if (missing) { 556 for (i = 0; i < M; i++) { 557 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 558 for (j = ai[i]; j < ai[i + 1]; j++) { 559 if (aj[j] < i) continue; 560 nz++; 561 } 562 } else { 563 nz += ai[i + 1] - adiag[i]; 564 } 565 } 566 } else { 567 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 568 } 569 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 570 PetscCall(PetscMalloc1(nz, &val)); 571 mumps->nnz = nz; 572 mumps->irn = row; 573 mumps->jcn = col; 574 mumps->val = mumps->val_alloc = val; 575 576 nz = 0; 577 if (missing) { 578 for (i = 0; i < M; i++) { 579 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 580 for (j = ai[i]; j < ai[i + 1]; j++) { 581 if (aj[j] < i) continue; 582 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 583 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 584 val[nz] = av[j]; 585 nz++; 586 } 587 } else { 588 rnz = ai[i + 1] - adiag[i]; 589 ajj = aj + adiag[i]; 590 v1 = av + adiag[i]; 591 for (j = 0; j < rnz; j++) { 592 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 593 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 594 val[nz++] = v1[j]; 595 } 596 } 597 } 598 } else { 599 for (i = 0; i < M; i++) { 600 rnz = ai[i + 1] - adiag[i]; 601 ajj = aj + adiag[i]; 602 v1 = av + adiag[i]; 603 for (j = 0; j < rnz; j++) { 604 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 605 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 606 val[nz++] = v1[j]; 607 } 608 } 609 } 610 } else { 611 nz = 0; 612 val = mumps->val; 613 if (missing) { 614 for (i = 0; i < M; i++) { 615 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 616 for (j = ai[i]; j < ai[i + 1]; j++) { 617 if (aj[j] < i) continue; 618 val[nz++] = av[j]; 619 } 620 } else { 621 rnz = ai[i + 1] - adiag[i]; 622 v1 = av + adiag[i]; 623 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 624 } 625 } 626 } else { 627 for (i = 0; i < M; i++) { 628 rnz = ai[i + 1] - adiag[i]; 629 v1 = av + adiag[i]; 630 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 631 } 632 } 633 } 634 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 635 PetscFunctionReturn(PETSC_SUCCESS); 636 } 637 638 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 639 { 640 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 641 PetscInt bs; 642 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 643 PetscMUMPSInt *row, *col; 644 const PetscScalar *av, *bv, *v1, *v2; 645 PetscScalar *val; 646 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 647 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 648 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 649 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 650 #if defined(PETSC_USE_COMPLEX) 651 PetscBool hermitian, isset; 652 #endif 653 654 PetscFunctionBegin; 655 #if defined(PETSC_USE_COMPLEX) 656 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 657 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 658 #endif 659 PetscCall(MatGetBlockSize(A, &bs)); 660 rstart = A->rmap->rstart; 661 ai = aa->i; 662 aj = aa->j; 663 bi = bb->i; 664 bj = bb->j; 665 av = aa->a; 666 bv = bb->a; 667 668 garray = mat->garray; 669 670 if (reuse == MAT_INITIAL_MATRIX) { 671 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 672 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 673 PetscCall(PetscMalloc1(nz, &val)); 674 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 675 mumps->irn = row; 676 mumps->jcn = col; 677 mumps->val = mumps->val_alloc = val; 678 } else { 679 val = mumps->val; 680 } 681 682 jj = 0; 683 irow = rstart; 684 for (i = 0; i < mbs; i++) { 685 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 686 countA = ai[i + 1] - ai[i]; 687 countB = bi[i + 1] - bi[i]; 688 bjj = bj + bi[i]; 689 v1 = av + ai[i] * bs2; 690 v2 = bv + bi[i] * bs2; 691 692 if (bs > 1) { 693 /* A-part */ 694 for (j = 0; j < countA; j++) { 695 for (k = 0; k < bs; k++) { 696 for (m = 0; m < bs; m++) { 697 if (rstart + ajj[j] * bs > irow || k >= m) { 698 if (reuse == MAT_INITIAL_MATRIX) { 699 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 700 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 701 } 702 val[jj++] = v1[j * bs2 + m + k * bs]; 703 } 704 } 705 } 706 } 707 708 /* B-part */ 709 for (j = 0; j < countB; j++) { 710 for (k = 0; k < bs; k++) { 711 for (m = 0; m < bs; m++) { 712 if (reuse == MAT_INITIAL_MATRIX) { 713 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 714 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 715 } 716 val[jj++] = v2[j * bs2 + m + k * bs]; 717 } 718 } 719 } 720 } else { 721 /* A-part */ 722 for (j = 0; j < countA; j++) { 723 if (reuse == MAT_INITIAL_MATRIX) { 724 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 725 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 726 } 727 val[jj++] = v1[j]; 728 } 729 730 /* B-part */ 731 for (j = 0; j < countB; j++) { 732 if (reuse == MAT_INITIAL_MATRIX) { 733 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 734 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 735 } 736 val[jj++] = v2[j]; 737 } 738 } 739 irow += bs; 740 } 741 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 742 PetscFunctionReturn(PETSC_SUCCESS); 743 } 744 745 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 746 { 747 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 748 PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 749 PetscMUMPSInt *row, *col; 750 const PetscScalar *av, *bv, *v1, *v2; 751 PetscScalar *val; 752 Mat Ad, Ao; 753 Mat_SeqAIJ *aa; 754 Mat_SeqAIJ *bb; 755 756 PetscFunctionBegin; 757 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 758 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 759 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 760 761 aa = (Mat_SeqAIJ *)(Ad)->data; 762 bb = (Mat_SeqAIJ *)(Ao)->data; 763 ai = aa->i; 764 aj = aa->j; 765 bi = bb->i; 766 bj = bb->j; 767 768 rstart = A->rmap->rstart; 769 cstart = A->cmap->rstart; 770 771 if (reuse == MAT_INITIAL_MATRIX) { 772 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 773 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 774 PetscCall(PetscMalloc1(nz, &val)); 775 mumps->nnz = nz; 776 mumps->irn = row; 777 mumps->jcn = col; 778 mumps->val = mumps->val_alloc = val; 779 } else { 780 val = mumps->val; 781 } 782 783 jj = 0; 784 irow = rstart; 785 for (i = 0; i < m; i++) { 786 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 787 countA = ai[i + 1] - ai[i]; 788 countB = bi[i + 1] - bi[i]; 789 bjj = bj + bi[i]; 790 v1 = av + ai[i]; 791 v2 = bv + bi[i]; 792 793 /* A-part */ 794 for (j = 0; j < countA; j++) { 795 if (reuse == MAT_INITIAL_MATRIX) { 796 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 797 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 798 } 799 val[jj++] = v1[j]; 800 } 801 802 /* B-part */ 803 for (j = 0; j < countB; j++) { 804 if (reuse == MAT_INITIAL_MATRIX) { 805 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 806 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 807 } 808 val[jj++] = v2[j]; 809 } 810 irow++; 811 } 812 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 813 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 814 PetscFunctionReturn(PETSC_SUCCESS); 815 } 816 817 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 818 { 819 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 820 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 821 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 822 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 823 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 824 const PetscInt bs2 = mat->bs2; 825 PetscInt bs; 826 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 827 PetscMUMPSInt *row, *col; 828 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 829 PetscScalar *val; 830 831 PetscFunctionBegin; 832 PetscCall(MatGetBlockSize(A, &bs)); 833 if (reuse == MAT_INITIAL_MATRIX) { 834 nz = bs2 * (aa->nz + bb->nz); 835 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 836 PetscCall(PetscMalloc1(nz, &val)); 837 mumps->nnz = nz; 838 mumps->irn = row; 839 mumps->jcn = col; 840 mumps->val = mumps->val_alloc = val; 841 } else { 842 val = mumps->val; 843 } 844 845 jj = 0; 846 irow = rstart; 847 for (i = 0; i < mbs; i++) { 848 countA = ai[i + 1] - ai[i]; 849 countB = bi[i + 1] - bi[i]; 850 ajj = aj + ai[i]; 851 bjj = bj + bi[i]; 852 v1 = av + bs2 * ai[i]; 853 v2 = bv + bs2 * bi[i]; 854 855 idx = 0; 856 /* A-part */ 857 for (k = 0; k < countA; k++) { 858 for (j = 0; j < bs; j++) { 859 for (n = 0; n < bs; n++) { 860 if (reuse == MAT_INITIAL_MATRIX) { 861 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 862 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 863 } 864 val[jj++] = v1[idx++]; 865 } 866 } 867 } 868 869 idx = 0; 870 /* B-part */ 871 for (k = 0; k < countB; k++) { 872 for (j = 0; j < bs; j++) { 873 for (n = 0; n < bs; n++) { 874 if (reuse == MAT_INITIAL_MATRIX) { 875 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 876 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 877 } 878 val[jj++] = v2[idx++]; 879 } 880 } 881 } 882 irow += bs; 883 } 884 PetscFunctionReturn(PETSC_SUCCESS); 885 } 886 887 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 888 { 889 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 890 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 891 PetscMUMPSInt *row, *col; 892 const PetscScalar *av, *bv, *v1, *v2; 893 PetscScalar *val; 894 Mat Ad, Ao; 895 Mat_SeqAIJ *aa; 896 Mat_SeqAIJ *bb; 897 #if defined(PETSC_USE_COMPLEX) 898 PetscBool hermitian, isset; 899 #endif 900 901 PetscFunctionBegin; 902 #if defined(PETSC_USE_COMPLEX) 903 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 904 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 905 #endif 906 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 907 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 908 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 909 910 aa = (Mat_SeqAIJ *)(Ad)->data; 911 bb = (Mat_SeqAIJ *)(Ao)->data; 912 ai = aa->i; 913 aj = aa->j; 914 adiag = aa->diag; 915 bi = bb->i; 916 bj = bb->j; 917 918 rstart = A->rmap->rstart; 919 920 if (reuse == MAT_INITIAL_MATRIX) { 921 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 922 nzb = 0; /* num of upper triangular entries in mat->B */ 923 for (i = 0; i < m; i++) { 924 nza += (ai[i + 1] - adiag[i]); 925 countB = bi[i + 1] - bi[i]; 926 bjj = bj + bi[i]; 927 for (j = 0; j < countB; j++) { 928 if (garray[bjj[j]] > rstart) nzb++; 929 } 930 } 931 932 nz = nza + nzb; /* total nz of upper triangular part of mat */ 933 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 934 PetscCall(PetscMalloc1(nz, &val)); 935 mumps->nnz = nz; 936 mumps->irn = row; 937 mumps->jcn = col; 938 mumps->val = mumps->val_alloc = val; 939 } else { 940 val = mumps->val; 941 } 942 943 jj = 0; 944 irow = rstart; 945 for (i = 0; i < m; i++) { 946 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 947 v1 = av + adiag[i]; 948 countA = ai[i + 1] - adiag[i]; 949 countB = bi[i + 1] - bi[i]; 950 bjj = bj + bi[i]; 951 v2 = bv + bi[i]; 952 953 /* A-part */ 954 for (j = 0; j < countA; j++) { 955 if (reuse == MAT_INITIAL_MATRIX) { 956 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 957 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 958 } 959 val[jj++] = v1[j]; 960 } 961 962 /* B-part */ 963 for (j = 0; j < countB; j++) { 964 if (garray[bjj[j]] > rstart) { 965 if (reuse == MAT_INITIAL_MATRIX) { 966 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 967 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 968 } 969 val[jj++] = v2[j]; 970 } 971 } 972 irow++; 973 } 974 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 975 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 976 PetscFunctionReturn(PETSC_SUCCESS); 977 } 978 979 PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 980 { 981 Mat **mats; 982 PetscInt nr, nc; 983 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 984 985 PetscFunctionBegin; 986 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 987 if (reuse == MAT_INITIAL_MATRIX) { 988 PetscMUMPSInt *irns, *jcns; 989 PetscScalar *vals; 990 PetscInt64 totnnz, cumnnz, maxnnz; 991 PetscInt *pjcns_w; 992 IS *rows, *cols; 993 PetscInt **rows_idx, **cols_idx; 994 995 cumnnz = 0; 996 maxnnz = 0; 997 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 998 for (PetscInt r = 0; r < nr; r++) { 999 for (PetscInt c = 0; c < nc; c++) { 1000 Mat sub = mats[r][c]; 1001 1002 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1003 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1004 if (sub) { 1005 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1006 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE; 1007 MatInfo info; 1008 1009 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1010 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1011 else { 1012 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1013 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1014 } 1015 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1016 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1017 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1018 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1019 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1020 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1021 1022 if (chol) { 1023 if (r == c) { 1024 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1025 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1026 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1027 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1028 } else { 1029 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1030 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1031 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1032 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1033 } 1034 } else { 1035 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1036 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1037 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1038 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1039 } 1040 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1041 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1042 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1043 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1044 maxnnz = PetscMax(maxnnz, info.nz_used); 1045 } 1046 } 1047 } 1048 1049 /* Allocate total COO */ 1050 totnnz = cumnnz; 1051 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1052 PetscCall(PetscMalloc1(totnnz, &vals)); 1053 1054 /* Handle rows and column maps 1055 We directly map rows and use an SF for the columns */ 1056 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1057 PetscCall(MatNestGetISs(A, rows, cols)); 1058 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1059 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1060 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1061 else (void)maxnnz; 1062 1063 cumnnz = 0; 1064 for (PetscInt r = 0; r < nr; r++) { 1065 for (PetscInt c = 0; c < nc; c++) { 1066 Mat sub = mats[r][c]; 1067 const PetscInt *ridx = rows_idx[r]; 1068 const PetscInt *cidx = cols_idx[c]; 1069 PetscInt rst; 1070 PetscSF csf; 1071 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1072 PetscLayout cmap; 1073 1074 mumps->nest_vals_start[r * nc + c] = cumnnz; 1075 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1076 1077 /* Extract inner blocks if needed */ 1078 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1079 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1080 else { 1081 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1082 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1083 } 1084 swap = (PetscBool)(isTrans || isHTrans); 1085 1086 /* Get column layout to map off-process columns */ 1087 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1088 1089 /* Get row start to map on-process rows */ 1090 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1091 1092 /* Directly use the mumps datastructure and use C ordering for now */ 1093 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1094 1095 /* Swap the role of rows and columns indices for transposed blocks 1096 since we need values with global final ordering */ 1097 if (swap) { 1098 cidx = rows_idx[r]; 1099 ridx = cols_idx[c]; 1100 } 1101 1102 /* Communicate column indices 1103 This could have been done with a single SF but it would have complicated the code a lot. 1104 But since we do it only once, we pay the price of setting up an SF for each block */ 1105 if (PetscDefined(USE_64BIT_INDICES)) { 1106 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1107 } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 1108 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1109 PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1110 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1111 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1112 PetscCall(PetscSFDestroy(&csf)); 1113 1114 /* Import indices: use direct map for rows and mapped indices for columns */ 1115 if (swap) { 1116 for (PetscInt k = 0; k < mumps->nnz; k++) { 1117 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1118 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1119 } 1120 } else { 1121 for (PetscInt k = 0; k < mumps->nnz; k++) { 1122 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1123 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1124 } 1125 } 1126 1127 /* Import values to full COO */ 1128 if (isHTrans) { /* conjugate the entries */ 1129 for (PetscInt k = 0; k < mumps->nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1130 } 1131 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1132 1133 /* Shift new starting point and sanity check */ 1134 cumnnz += mumps->nnz; 1135 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1136 1137 /* Free scratch memory */ 1138 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1139 PetscCall(PetscFree(mumps->val_alloc)); 1140 mumps->val = NULL; 1141 mumps->nnz = 0; 1142 } 1143 } 1144 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1145 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1146 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1147 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1148 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1149 mumps->nest_vals_start[nr * nc] = cumnnz; 1150 1151 /* Set pointers for final MUMPS data structure */ 1152 mumps->nest_vals = vals; 1153 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1154 mumps->val = vals; 1155 mumps->irn = irns; 1156 mumps->jcn = jcns; 1157 mumps->nnz = cumnnz; 1158 } else { 1159 PetscScalar *oval = mumps->nest_vals; 1160 for (PetscInt r = 0; r < nr; r++) { 1161 for (PetscInt c = 0; c < nc; c++) { 1162 PetscBool isTrans, isHTrans = PETSC_FALSE; 1163 Mat sub = mats[r][c]; 1164 PetscInt midx = r * nc + c; 1165 1166 if (!mumps->nest_convert_to_triples[midx]) continue; 1167 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1168 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1169 else { 1170 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1171 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1172 } 1173 mumps->val = oval + mumps->nest_vals_start[midx]; 1174 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1175 if (isHTrans) { 1176 PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1177 for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1178 } 1179 } 1180 } 1181 mumps->val = oval; 1182 } 1183 PetscFunctionReturn(PETSC_SUCCESS); 1184 } 1185 1186 PetscErrorCode MatDestroy_MUMPS(Mat A) 1187 { 1188 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1189 1190 PetscFunctionBegin; 1191 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1192 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1193 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1194 PetscCall(VecDestroy(&mumps->b_seq)); 1195 PetscCall(VecDestroy(&mumps->x_seq)); 1196 PetscCall(PetscFree(mumps->id.perm_in)); 1197 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1198 PetscCall(PetscFree(mumps->val_alloc)); 1199 PetscCall(PetscFree(mumps->info)); 1200 PetscCall(PetscFree(mumps->ICNTL_pre)); 1201 PetscCall(PetscFree(mumps->CNTL_pre)); 1202 PetscCall(MatMumpsResetSchur_Private(mumps)); 1203 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1204 mumps->id.job = JOB_END; 1205 PetscMUMPS_c(mumps); 1206 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1207 if (mumps->mumps_comm != MPI_COMM_NULL) { 1208 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1209 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1210 } 1211 } 1212 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1213 if (mumps->use_petsc_omp_support) { 1214 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1215 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1216 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1217 } 1218 #endif 1219 PetscCall(PetscFree(mumps->ia_alloc)); 1220 PetscCall(PetscFree(mumps->ja_alloc)); 1221 PetscCall(PetscFree(mumps->recvcount)); 1222 PetscCall(PetscFree(mumps->reqs)); 1223 PetscCall(PetscFree(mumps->irhs_loc)); 1224 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1225 PetscCall(PetscFree(mumps->nest_vals)); 1226 PetscCall(PetscFree(A->data)); 1227 1228 /* clear composed functions */ 1229 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1230 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1231 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1232 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1233 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1234 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1235 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1236 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1237 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1238 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1239 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1240 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1241 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1242 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1243 PetscFunctionReturn(PETSC_SUCCESS); 1244 } 1245 1246 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1247 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1248 { 1249 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1250 const PetscMPIInt ompsize = mumps->omp_comm_size; 1251 PetscInt i, m, M, rstart; 1252 1253 PetscFunctionBegin; 1254 PetscCall(MatGetSize(A, &M, NULL)); 1255 PetscCall(MatGetLocalSize(A, &m, NULL)); 1256 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1257 if (ompsize == 1) { 1258 if (!mumps->irhs_loc) { 1259 mumps->nloc_rhs = m; 1260 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1261 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1262 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1263 } 1264 mumps->id.rhs_loc = (MumpsScalar *)array; 1265 } else { 1266 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1267 const PetscInt *ranges; 1268 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1269 MPI_Group petsc_group, omp_group; 1270 PetscScalar *recvbuf = NULL; 1271 1272 if (mumps->is_omp_master) { 1273 /* Lazily initialize the omp stuff for distributed rhs */ 1274 if (!mumps->irhs_loc) { 1275 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1276 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1277 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1278 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1279 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1280 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1281 1282 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1283 mumps->nloc_rhs = 0; 1284 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1285 for (j = 0; j < ompsize; j++) { 1286 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1287 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1288 } 1289 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1290 for (j = k = 0; j < ompsize; j++) { 1291 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1292 } 1293 1294 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1295 PetscCallMPI(MPI_Group_free(&petsc_group)); 1296 PetscCallMPI(MPI_Group_free(&omp_group)); 1297 } 1298 1299 /* Realloc buffers when current nrhs is bigger than what we have met */ 1300 if (nrhs > mumps->max_nrhs) { 1301 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1302 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1303 mumps->max_nrhs = nrhs; 1304 } 1305 1306 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1307 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1308 mumps->rhs_disps[0] = 0; 1309 for (j = 1; j < ompsize; j++) { 1310 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1311 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1312 } 1313 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1314 } 1315 1316 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1317 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1318 1319 if (mumps->is_omp_master) { 1320 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1321 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1322 for (j = 0; j < ompsize; j++) { 1323 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1324 dst = dstbase; 1325 for (i = 0; i < nrhs; i++) { 1326 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1327 src += mumps->rhs_nrow[j]; 1328 dst += mumps->nloc_rhs; 1329 } 1330 dstbase += mumps->rhs_nrow[j]; 1331 } 1332 } 1333 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1334 } 1335 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1336 } 1337 mumps->id.nrhs = nrhs; 1338 mumps->id.nloc_rhs = mumps->nloc_rhs; 1339 mumps->id.lrhs_loc = mumps->nloc_rhs; 1340 mumps->id.irhs_loc = mumps->irhs_loc; 1341 PetscFunctionReturn(PETSC_SUCCESS); 1342 } 1343 1344 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1345 { 1346 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1347 const PetscScalar *rarray = NULL; 1348 PetscScalar *array; 1349 IS is_iden, is_petsc; 1350 PetscInt i; 1351 PetscBool second_solve = PETSC_FALSE; 1352 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1353 1354 PetscFunctionBegin; 1355 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1356 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1357 &cite1)); 1358 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1359 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1360 &cite2)); 1361 1362 if (A->factorerrortype) { 1363 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1364 PetscCall(VecSetInf(x)); 1365 PetscFunctionReturn(PETSC_SUCCESS); 1366 } 1367 1368 mumps->id.nrhs = 1; 1369 if (mumps->petsc_size > 1) { 1370 if (mumps->ICNTL20 == 10) { 1371 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1372 PetscCall(VecGetArrayRead(b, &rarray)); 1373 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1374 } else { 1375 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1376 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1377 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1378 if (!mumps->myid) { 1379 PetscCall(VecGetArray(mumps->b_seq, &array)); 1380 mumps->id.rhs = (MumpsScalar *)array; 1381 } 1382 } 1383 } else { /* petsc_size == 1 */ 1384 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1385 PetscCall(VecCopy(b, x)); 1386 PetscCall(VecGetArray(x, &array)); 1387 mumps->id.rhs = (MumpsScalar *)array; 1388 } 1389 1390 /* 1391 handle condensation step of Schur complement (if any) 1392 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1393 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1394 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1395 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1396 */ 1397 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1398 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1399 second_solve = PETSC_TRUE; 1400 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1401 } 1402 /* solve phase */ 1403 mumps->id.job = JOB_SOLVE; 1404 PetscMUMPS_c(mumps); 1405 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1406 1407 /* handle expansion step of Schur complement (if any) */ 1408 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1409 1410 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1411 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1412 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1413 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1414 } 1415 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1416 PetscInt *isol2_loc = NULL; 1417 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1418 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1419 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1420 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1421 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1422 PetscCall(ISDestroy(&is_iden)); 1423 PetscCall(ISDestroy(&is_petsc)); 1424 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1425 } 1426 1427 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1428 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1429 } 1430 1431 if (mumps->petsc_size > 1) { 1432 if (mumps->ICNTL20 == 10) { 1433 PetscCall(VecRestoreArrayRead(b, &rarray)); 1434 } else if (!mumps->myid) { 1435 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1436 } 1437 } else PetscCall(VecRestoreArray(x, &array)); 1438 1439 PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3))); 1440 PetscFunctionReturn(PETSC_SUCCESS); 1441 } 1442 1443 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1444 { 1445 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1446 1447 PetscFunctionBegin; 1448 mumps->id.ICNTL(9) = 0; 1449 PetscCall(MatSolve_MUMPS(A, b, x)); 1450 mumps->id.ICNTL(9) = 1; 1451 PetscFunctionReturn(PETSC_SUCCESS); 1452 } 1453 1454 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1455 { 1456 Mat Bt = NULL; 1457 PetscBool denseX, denseB, flg, flgT; 1458 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1459 PetscInt i, nrhs, M; 1460 PetscScalar *array; 1461 const PetscScalar *rbray; 1462 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1463 PetscMUMPSInt *isol_loc, *isol_loc_save; 1464 PetscScalar *bray, *sol_loc, *sol_loc_save; 1465 IS is_to, is_from; 1466 PetscInt k, proc, j, m, myrstart; 1467 const PetscInt *rstart; 1468 Vec v_mpi, msol_loc; 1469 VecScatter scat_sol; 1470 Vec b_seq; 1471 VecScatter scat_rhs; 1472 PetscScalar *aa; 1473 PetscInt spnr, *ia, *ja; 1474 Mat_MPIAIJ *b = NULL; 1475 1476 PetscFunctionBegin; 1477 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1478 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1479 1480 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1481 if (denseB) { 1482 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1483 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1484 } else { /* sparse B */ 1485 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1486 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1487 if (flgT) { /* input B is transpose of actual RHS matrix, 1488 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1489 PetscCall(MatTransposeGetMat(B, &Bt)); 1490 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1491 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1492 } 1493 1494 PetscCall(MatGetSize(B, &M, &nrhs)); 1495 mumps->id.nrhs = nrhs; 1496 mumps->id.lrhs = M; 1497 mumps->id.rhs = NULL; 1498 1499 if (mumps->petsc_size == 1) { 1500 PetscScalar *aa; 1501 PetscInt spnr, *ia, *ja; 1502 PetscBool second_solve = PETSC_FALSE; 1503 1504 PetscCall(MatDenseGetArray(X, &array)); 1505 mumps->id.rhs = (MumpsScalar *)array; 1506 1507 if (denseB) { 1508 /* copy B to X */ 1509 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1510 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1511 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1512 } else { /* sparse B */ 1513 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1514 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1515 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1516 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1517 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1518 } 1519 /* handle condensation step of Schur complement (if any) */ 1520 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1521 second_solve = PETSC_TRUE; 1522 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1523 } 1524 /* solve phase */ 1525 mumps->id.job = JOB_SOLVE; 1526 PetscMUMPS_c(mumps); 1527 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1528 1529 /* handle expansion step of Schur complement (if any) */ 1530 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1531 if (!denseB) { /* sparse B */ 1532 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1533 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1534 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1535 } 1536 PetscCall(MatDenseRestoreArray(X, &array)); 1537 PetscFunctionReturn(PETSC_SUCCESS); 1538 } 1539 1540 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1541 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1542 1543 /* create msol_loc to hold mumps local solution */ 1544 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1545 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1546 1547 lsol_loc = mumps->id.lsol_loc; 1548 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1549 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1550 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1551 mumps->id.isol_loc = isol_loc; 1552 1553 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1554 1555 if (denseB) { 1556 if (mumps->ICNTL20 == 10) { 1557 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1558 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1559 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1560 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1561 PetscCall(MatGetLocalSize(B, &m, NULL)); 1562 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1563 } else { 1564 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1565 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1566 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1567 0, re-arrange B into desired order, which is a local operation. 1568 */ 1569 1570 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1571 /* wrap dense rhs matrix B into a vector v_mpi */ 1572 PetscCall(MatGetLocalSize(B, &m, NULL)); 1573 PetscCall(MatDenseGetArray(B, &bray)); 1574 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1575 PetscCall(MatDenseRestoreArray(B, &bray)); 1576 1577 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1578 if (!mumps->myid) { 1579 PetscInt *idx; 1580 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1581 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1582 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1583 k = 0; 1584 for (proc = 0; proc < mumps->petsc_size; proc++) { 1585 for (j = 0; j < nrhs; j++) { 1586 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1587 } 1588 } 1589 1590 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1591 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1592 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1593 } else { 1594 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1595 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1596 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1597 } 1598 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1599 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1600 PetscCall(ISDestroy(&is_to)); 1601 PetscCall(ISDestroy(&is_from)); 1602 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1603 1604 if (!mumps->myid) { /* define rhs on the host */ 1605 PetscCall(VecGetArray(b_seq, &bray)); 1606 mumps->id.rhs = (MumpsScalar *)bray; 1607 PetscCall(VecRestoreArray(b_seq, &bray)); 1608 } 1609 } 1610 } else { /* sparse B */ 1611 b = (Mat_MPIAIJ *)Bt->data; 1612 1613 /* wrap dense X into a vector v_mpi */ 1614 PetscCall(MatGetLocalSize(X, &m, NULL)); 1615 PetscCall(MatDenseGetArray(X, &bray)); 1616 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1617 PetscCall(MatDenseRestoreArray(X, &bray)); 1618 1619 if (!mumps->myid) { 1620 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1621 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1622 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1623 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1624 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1625 } else { 1626 mumps->id.irhs_ptr = NULL; 1627 mumps->id.irhs_sparse = NULL; 1628 mumps->id.nz_rhs = 0; 1629 mumps->id.rhs_sparse = NULL; 1630 } 1631 } 1632 1633 /* solve phase */ 1634 mumps->id.job = JOB_SOLVE; 1635 PetscMUMPS_c(mumps); 1636 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1637 1638 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1639 PetscCall(MatDenseGetArray(X, &array)); 1640 PetscCall(VecPlaceArray(v_mpi, array)); 1641 1642 /* create scatter scat_sol */ 1643 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1644 /* iidx: index for scatter mumps solution to petsc X */ 1645 1646 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1647 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1648 for (i = 0; i < lsol_loc; i++) { 1649 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1650 1651 for (proc = 0; proc < mumps->petsc_size; proc++) { 1652 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1653 myrstart = rstart[proc]; 1654 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1655 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1656 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1657 break; 1658 } 1659 } 1660 1661 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1662 } 1663 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1664 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1665 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1666 PetscCall(ISDestroy(&is_from)); 1667 PetscCall(ISDestroy(&is_to)); 1668 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1669 PetscCall(MatDenseRestoreArray(X, &array)); 1670 1671 /* free spaces */ 1672 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1673 mumps->id.isol_loc = isol_loc_save; 1674 1675 PetscCall(PetscFree2(sol_loc, isol_loc)); 1676 PetscCall(PetscFree(idxx)); 1677 PetscCall(VecDestroy(&msol_loc)); 1678 PetscCall(VecDestroy(&v_mpi)); 1679 if (!denseB) { 1680 if (!mumps->myid) { 1681 b = (Mat_MPIAIJ *)Bt->data; 1682 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1683 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1684 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1685 } 1686 } else { 1687 if (mumps->ICNTL20 == 0) { 1688 PetscCall(VecDestroy(&b_seq)); 1689 PetscCall(VecScatterDestroy(&scat_rhs)); 1690 } 1691 } 1692 PetscCall(VecScatterDestroy(&scat_sol)); 1693 PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3))); 1694 PetscFunctionReturn(PETSC_SUCCESS); 1695 } 1696 1697 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1698 { 1699 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1700 PetscMUMPSInt oldvalue = mumps->id.ICNTL(9); 1701 1702 PetscFunctionBegin; 1703 mumps->id.ICNTL(9) = 0; 1704 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1705 mumps->id.ICNTL(9) = oldvalue; 1706 PetscFunctionReturn(PETSC_SUCCESS); 1707 } 1708 1709 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1710 { 1711 PetscBool flg; 1712 Mat B; 1713 1714 PetscFunctionBegin; 1715 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1716 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1717 1718 /* Create B=Bt^T that uses Bt's data structure */ 1719 PetscCall(MatCreateTranspose(Bt, &B)); 1720 1721 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1722 PetscCall(MatDestroy(&B)); 1723 PetscFunctionReturn(PETSC_SUCCESS); 1724 } 1725 1726 #if !defined(PETSC_USE_COMPLEX) 1727 /* 1728 input: 1729 F: numeric factor 1730 output: 1731 nneg: total number of negative pivots 1732 nzero: total number of zero pivots 1733 npos: (global dimension of F) - nneg - nzero 1734 */ 1735 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1736 { 1737 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1738 PetscMPIInt size; 1739 1740 PetscFunctionBegin; 1741 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1742 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1743 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1744 1745 if (nneg) *nneg = mumps->id.INFOG(12); 1746 if (nzero || npos) { 1747 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1748 if (nzero) *nzero = mumps->id.INFOG(28); 1749 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1750 } 1751 PetscFunctionReturn(PETSC_SUCCESS); 1752 } 1753 #endif 1754 1755 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1756 { 1757 PetscInt i, nreqs; 1758 PetscMUMPSInt *irn, *jcn; 1759 PetscMPIInt count; 1760 PetscInt64 totnnz, remain; 1761 const PetscInt osize = mumps->omp_comm_size; 1762 PetscScalar *val; 1763 1764 PetscFunctionBegin; 1765 if (osize > 1) { 1766 if (reuse == MAT_INITIAL_MATRIX) { 1767 /* master first gathers counts of nonzeros to receive */ 1768 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1769 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1770 1771 /* Then each computes number of send/recvs */ 1772 if (mumps->is_omp_master) { 1773 /* Start from 1 since self communication is not done in MPI */ 1774 nreqs = 0; 1775 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1776 } else { 1777 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1778 } 1779 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1780 1781 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1782 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1783 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1784 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1785 */ 1786 nreqs = 0; /* counter for actual send/recvs */ 1787 if (mumps->is_omp_master) { 1788 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1789 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1790 PetscCall(PetscMalloc1(totnnz, &val)); 1791 1792 /* Self communication */ 1793 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1794 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1795 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1796 1797 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1798 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1799 PetscCall(PetscFree(mumps->val_alloc)); 1800 mumps->nnz = totnnz; 1801 mumps->irn = irn; 1802 mumps->jcn = jcn; 1803 mumps->val = mumps->val_alloc = val; 1804 1805 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1806 jcn += mumps->recvcount[0]; 1807 val += mumps->recvcount[0]; 1808 1809 /* Remote communication */ 1810 for (i = 1; i < osize; i++) { 1811 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1812 remain = mumps->recvcount[i] - count; 1813 while (count > 0) { 1814 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1815 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1816 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1817 irn += count; 1818 jcn += count; 1819 val += count; 1820 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1821 remain -= count; 1822 } 1823 } 1824 } else { 1825 irn = mumps->irn; 1826 jcn = mumps->jcn; 1827 val = mumps->val; 1828 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1829 remain = mumps->nnz - count; 1830 while (count > 0) { 1831 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1832 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1833 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1834 irn += count; 1835 jcn += count; 1836 val += count; 1837 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1838 remain -= count; 1839 } 1840 } 1841 } else { 1842 nreqs = 0; 1843 if (mumps->is_omp_master) { 1844 val = mumps->val + mumps->recvcount[0]; 1845 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1846 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1847 remain = mumps->recvcount[i] - count; 1848 while (count > 0) { 1849 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1850 val += count; 1851 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1852 remain -= count; 1853 } 1854 } 1855 } else { 1856 val = mumps->val; 1857 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1858 remain = mumps->nnz - count; 1859 while (count > 0) { 1860 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1861 val += count; 1862 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1863 remain -= count; 1864 } 1865 } 1866 } 1867 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1868 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1869 } 1870 PetscFunctionReturn(PETSC_SUCCESS); 1871 } 1872 1873 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1874 { 1875 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1876 PetscBool isMPIAIJ; 1877 1878 PetscFunctionBegin; 1879 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1880 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1881 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1882 PetscFunctionReturn(PETSC_SUCCESS); 1883 } 1884 1885 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1886 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1887 1888 /* numerical factorization phase */ 1889 mumps->id.job = JOB_FACTNUMERIC; 1890 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1891 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1892 } else { 1893 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1894 } 1895 PetscMUMPS_c(mumps); 1896 if (mumps->id.INFOG(1) < 0) { 1897 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 1898 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1899 PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1900 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1901 } else if (mumps->id.INFOG(1) == -13) { 1902 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1903 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1904 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1905 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1906 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1907 } else { 1908 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1909 F->factorerrortype = MAT_FACTOR_OTHER; 1910 } 1911 } 1912 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, " mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16)); 1913 1914 F->assembled = PETSC_TRUE; 1915 1916 if (F->schur) { /* reset Schur status to unfactored */ 1917 #if defined(PETSC_HAVE_CUDA) 1918 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1919 #endif 1920 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1921 mumps->id.ICNTL(19) = 2; 1922 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1923 } 1924 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1925 } 1926 1927 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1928 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1929 1930 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1931 if (mumps->petsc_size > 1) { 1932 PetscInt lsol_loc; 1933 PetscScalar *sol_loc; 1934 1935 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1936 1937 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1938 if (mumps->x_seq) { 1939 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1940 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1941 PetscCall(VecDestroy(&mumps->x_seq)); 1942 } 1943 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1944 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1945 mumps->id.lsol_loc = lsol_loc; 1946 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1947 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 1948 } 1949 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1950 PetscFunctionReturn(PETSC_SUCCESS); 1951 } 1952 1953 /* Sets MUMPS options from the options database */ 1954 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1955 { 1956 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1957 PetscMUMPSInt icntl = 0, size, *listvar_schur; 1958 PetscInt info[80], i, ninfo = 80, rbs, cbs; 1959 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1960 MumpsScalar *arr; 1961 1962 PetscFunctionBegin; 1963 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 1964 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1965 PetscInt nthreads = 0; 1966 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1967 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1968 1969 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1970 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 1971 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 1972 1973 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 1974 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1975 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1976 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 1977 if (mumps->use_petsc_omp_support) { 1978 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 1979 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1980 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1981 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1982 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 1983 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 1984 #endif 1985 } else { 1986 mumps->omp_comm = PETSC_COMM_SELF; 1987 mumps->mumps_comm = mumps->petsc_comm; 1988 mumps->is_omp_master = PETSC_TRUE; 1989 } 1990 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 1991 mumps->reqs = NULL; 1992 mumps->tag = 0; 1993 1994 if (mumps->mumps_comm != MPI_COMM_NULL) { 1995 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 1996 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1997 MPI_Comm comm; 1998 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 1999 mumps->mumps_comm = comm; 2000 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2001 } 2002 2003 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2004 mumps->id.job = JOB_INIT; 2005 mumps->id.par = 1; /* host participates factorizaton and solve */ 2006 mumps->id.sym = mumps->sym; 2007 2008 size = mumps->id.size_schur; 2009 arr = mumps->id.schur; 2010 listvar_schur = mumps->id.listvar_schur; 2011 PetscMUMPS_c(mumps); 2012 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 2013 /* restore cached ICNTL and CNTL values */ 2014 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2015 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2016 PetscCall(PetscFree(mumps->ICNTL_pre)); 2017 PetscCall(PetscFree(mumps->CNTL_pre)); 2018 2019 if (schur) { 2020 mumps->id.size_schur = size; 2021 mumps->id.schur_lld = size; 2022 mumps->id.schur = arr; 2023 mumps->id.listvar_schur = listvar_schur; 2024 if (mumps->petsc_size > 1) { 2025 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2026 2027 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2028 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2029 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2030 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2031 } else { 2032 if (F->factortype == MAT_FACTOR_LU) { 2033 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2034 } else { 2035 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2036 } 2037 } 2038 mumps->id.ICNTL(26) = -1; 2039 } 2040 2041 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2042 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2043 */ 2044 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2045 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2046 2047 mumps->scat_rhs = NULL; 2048 mumps->scat_sol = NULL; 2049 2050 /* set PETSc-MUMPS default options - override MUMPS default */ 2051 mumps->id.ICNTL(3) = 0; 2052 mumps->id.ICNTL(4) = 0; 2053 if (mumps->petsc_size == 1) { 2054 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2055 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2056 } else { 2057 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2058 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2059 } 2060 } 2061 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2062 if (flg) mumps->id.ICNTL(1) = icntl; 2063 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2064 if (flg) mumps->id.ICNTL(2) = icntl; 2065 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2066 if (flg) mumps->id.ICNTL(3) = icntl; 2067 2068 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2069 if (flg) mumps->id.ICNTL(4) = icntl; 2070 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2071 2072 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2073 if (flg) mumps->id.ICNTL(6) = icntl; 2074 2075 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2076 if (flg) { 2077 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2078 mumps->id.ICNTL(7) = icntl; 2079 } 2080 2081 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2082 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2083 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2084 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2085 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2086 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2087 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2088 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2089 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2090 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2091 if (flg) { 2092 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2093 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2094 } 2095 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2096 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2097 PetscCall(MatDestroy(&F->schur)); 2098 PetscCall(MatMumpsResetSchur_Private(mumps)); 2099 } 2100 2101 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2102 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2103 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2104 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2105 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2106 In short, we could not use distributed RHS with MPICH until v4.0b1. 2107 */ 2108 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 2109 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2110 #else 2111 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2112 #endif 2113 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2114 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2115 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2116 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2117 #endif 2118 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2119 2120 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2121 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2122 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2123 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2124 2125 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2126 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2127 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2128 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2129 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2130 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2131 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2132 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2133 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2134 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2135 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2136 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2137 2138 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2139 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2140 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2141 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2142 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2143 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2144 2145 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2146 2147 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2148 if (ninfo) { 2149 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2150 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2151 mumps->ninfo = ninfo; 2152 for (i = 0; i < ninfo; i++) { 2153 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2154 mumps->info[i] = info[i]; 2155 } 2156 } 2157 PetscOptionsEnd(); 2158 PetscFunctionReturn(PETSC_SUCCESS); 2159 } 2160 2161 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2162 { 2163 PetscFunctionBegin; 2164 if (mumps->id.INFOG(1) < 0) { 2165 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1)); 2166 if (mumps->id.INFOG(1) == -6) { 2167 PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2168 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2169 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2170 PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2171 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2172 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2173 PetscCall(PetscInfo(F, "Empty matrix\n")); 2174 } else { 2175 PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2176 F->factorerrortype = MAT_FACTOR_OTHER; 2177 } 2178 } 2179 PetscFunctionReturn(PETSC_SUCCESS); 2180 } 2181 2182 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2183 { 2184 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2185 Vec b; 2186 const PetscInt M = A->rmap->N; 2187 2188 PetscFunctionBegin; 2189 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2190 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2191 PetscFunctionReturn(PETSC_SUCCESS); 2192 } 2193 2194 /* Set MUMPS options from the options database */ 2195 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2196 2197 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2198 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2199 2200 /* analysis phase */ 2201 mumps->id.job = JOB_FACTSYMBOLIC; 2202 mumps->id.n = M; 2203 switch (mumps->id.ICNTL(18)) { 2204 case 0: /* centralized assembled matrix input */ 2205 if (!mumps->myid) { 2206 mumps->id.nnz = mumps->nnz; 2207 mumps->id.irn = mumps->irn; 2208 mumps->id.jcn = mumps->jcn; 2209 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2210 if (r) { 2211 mumps->id.ICNTL(7) = 1; 2212 if (!mumps->myid) { 2213 const PetscInt *idx; 2214 PetscInt i; 2215 2216 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2217 PetscCall(ISGetIndices(r, &idx)); 2218 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2219 PetscCall(ISRestoreIndices(r, &idx)); 2220 } 2221 } 2222 } 2223 break; 2224 case 3: /* distributed assembled matrix input (size>1) */ 2225 mumps->id.nnz_loc = mumps->nnz; 2226 mumps->id.irn_loc = mumps->irn; 2227 mumps->id.jcn_loc = mumps->jcn; 2228 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2229 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2230 PetscCall(MatCreateVecs(A, NULL, &b)); 2231 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2232 PetscCall(VecDestroy(&b)); 2233 } 2234 break; 2235 } 2236 PetscMUMPS_c(mumps); 2237 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2238 2239 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2240 F->ops->solve = MatSolve_MUMPS; 2241 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2242 F->ops->matsolve = MatMatSolve_MUMPS; 2243 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2244 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2245 2246 mumps->matstruc = SAME_NONZERO_PATTERN; 2247 PetscFunctionReturn(PETSC_SUCCESS); 2248 } 2249 2250 /* Note the Petsc r and c permutations are ignored */ 2251 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2252 { 2253 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2254 Vec b; 2255 const PetscInt M = A->rmap->N; 2256 2257 PetscFunctionBegin; 2258 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2259 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2260 PetscFunctionReturn(PETSC_SUCCESS); 2261 } 2262 2263 /* Set MUMPS options from the options database */ 2264 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2265 2266 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2267 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2268 2269 /* analysis phase */ 2270 mumps->id.job = JOB_FACTSYMBOLIC; 2271 mumps->id.n = M; 2272 switch (mumps->id.ICNTL(18)) { 2273 case 0: /* centralized assembled matrix input */ 2274 if (!mumps->myid) { 2275 mumps->id.nnz = mumps->nnz; 2276 mumps->id.irn = mumps->irn; 2277 mumps->id.jcn = mumps->jcn; 2278 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2279 } 2280 break; 2281 case 3: /* distributed assembled matrix input (size>1) */ 2282 mumps->id.nnz_loc = mumps->nnz; 2283 mumps->id.irn_loc = mumps->irn; 2284 mumps->id.jcn_loc = mumps->jcn; 2285 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2286 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2287 PetscCall(MatCreateVecs(A, NULL, &b)); 2288 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2289 PetscCall(VecDestroy(&b)); 2290 } 2291 break; 2292 } 2293 PetscMUMPS_c(mumps); 2294 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2295 2296 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2297 F->ops->solve = MatSolve_MUMPS; 2298 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2299 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2300 2301 mumps->matstruc = SAME_NONZERO_PATTERN; 2302 PetscFunctionReturn(PETSC_SUCCESS); 2303 } 2304 2305 /* Note the Petsc r permutation and factor info are ignored */ 2306 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2307 { 2308 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2309 Vec b; 2310 const PetscInt M = A->rmap->N; 2311 2312 PetscFunctionBegin; 2313 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2314 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2315 PetscFunctionReturn(PETSC_SUCCESS); 2316 } 2317 2318 /* Set MUMPS options from the options database */ 2319 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2320 2321 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2322 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2323 2324 /* analysis phase */ 2325 mumps->id.job = JOB_FACTSYMBOLIC; 2326 mumps->id.n = M; 2327 switch (mumps->id.ICNTL(18)) { 2328 case 0: /* centralized assembled matrix input */ 2329 if (!mumps->myid) { 2330 mumps->id.nnz = mumps->nnz; 2331 mumps->id.irn = mumps->irn; 2332 mumps->id.jcn = mumps->jcn; 2333 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2334 } 2335 break; 2336 case 3: /* distributed assembled matrix input (size>1) */ 2337 mumps->id.nnz_loc = mumps->nnz; 2338 mumps->id.irn_loc = mumps->irn; 2339 mumps->id.jcn_loc = mumps->jcn; 2340 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2341 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2342 PetscCall(MatCreateVecs(A, NULL, &b)); 2343 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2344 PetscCall(VecDestroy(&b)); 2345 } 2346 break; 2347 } 2348 PetscMUMPS_c(mumps); 2349 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2350 2351 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2352 F->ops->solve = MatSolve_MUMPS; 2353 F->ops->solvetranspose = MatSolve_MUMPS; 2354 F->ops->matsolve = MatMatSolve_MUMPS; 2355 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2356 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2357 #if defined(PETSC_USE_COMPLEX) 2358 F->ops->getinertia = NULL; 2359 #else 2360 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2361 #endif 2362 2363 mumps->matstruc = SAME_NONZERO_PATTERN; 2364 PetscFunctionReturn(PETSC_SUCCESS); 2365 } 2366 2367 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2368 { 2369 PetscBool iascii; 2370 PetscViewerFormat format; 2371 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2372 2373 PetscFunctionBegin; 2374 /* check if matrix is mumps type */ 2375 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2376 2377 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2378 if (iascii) { 2379 PetscCall(PetscViewerGetFormat(viewer, &format)); 2380 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2381 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2382 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2383 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2384 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2385 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2386 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2387 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2388 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2389 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2390 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2391 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2392 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2393 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2394 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2395 if (mumps->id.ICNTL(11) > 0) { 2396 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2397 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2398 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2399 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2400 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2401 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2402 } 2403 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2404 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2405 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2406 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2407 /* ICNTL(15-17) not used */ 2408 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2409 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2410 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2411 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2412 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2413 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2414 2415 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2416 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2417 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2418 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2419 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2420 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2421 2422 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2423 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2424 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2425 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2426 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2427 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2428 2429 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2430 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2431 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2432 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2433 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2434 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2435 2436 /* information local to each processor */ 2437 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2438 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2439 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2440 PetscCall(PetscViewerFlush(viewer)); 2441 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2442 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2443 PetscCall(PetscViewerFlush(viewer)); 2444 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2445 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2446 PetscCall(PetscViewerFlush(viewer)); 2447 2448 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2449 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2450 PetscCall(PetscViewerFlush(viewer)); 2451 2452 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2453 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2454 PetscCall(PetscViewerFlush(viewer)); 2455 2456 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2457 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2458 PetscCall(PetscViewerFlush(viewer)); 2459 2460 if (mumps->ninfo && mumps->ninfo <= 80) { 2461 PetscInt i; 2462 for (i = 0; i < mumps->ninfo; i++) { 2463 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2464 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2465 PetscCall(PetscViewerFlush(viewer)); 2466 } 2467 } 2468 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2469 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2470 2471 if (mumps->myid == 0) { /* information from the host */ 2472 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2473 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2474 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2475 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2476 2477 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2478 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2479 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2480 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2481 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2482 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2483 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2484 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2485 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2486 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2487 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2488 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2489 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2490 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2491 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2492 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2493 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2494 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2495 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2496 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2497 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2498 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2499 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2500 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2501 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2502 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2503 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2504 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2505 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2506 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2507 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2508 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2509 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2510 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2511 } 2512 } 2513 } 2514 PetscFunctionReturn(PETSC_SUCCESS); 2515 } 2516 2517 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2518 { 2519 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2520 2521 PetscFunctionBegin; 2522 info->block_size = 1.0; 2523 info->nz_allocated = mumps->id.INFOG(20); 2524 info->nz_used = mumps->id.INFOG(20); 2525 info->nz_unneeded = 0.0; 2526 info->assemblies = 0.0; 2527 info->mallocs = 0.0; 2528 info->memory = 0.0; 2529 info->fill_ratio_given = 0; 2530 info->fill_ratio_needed = 0; 2531 info->factor_mallocs = 0; 2532 PetscFunctionReturn(PETSC_SUCCESS); 2533 } 2534 2535 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2536 { 2537 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2538 const PetscScalar *arr; 2539 const PetscInt *idxs; 2540 PetscInt size, i; 2541 2542 PetscFunctionBegin; 2543 PetscCall(ISGetLocalSize(is, &size)); 2544 /* Schur complement matrix */ 2545 PetscCall(MatDestroy(&F->schur)); 2546 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2547 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2548 mumps->id.schur = (MumpsScalar *)arr; 2549 mumps->id.size_schur = size; 2550 mumps->id.schur_lld = size; 2551 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2552 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2553 2554 /* MUMPS expects Fortran style indices */ 2555 PetscCall(PetscFree(mumps->id.listvar_schur)); 2556 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2557 PetscCall(ISGetIndices(is, &idxs)); 2558 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2559 PetscCall(ISRestoreIndices(is, &idxs)); 2560 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2561 mumps->id.ICNTL(26) = -1; 2562 PetscFunctionReturn(PETSC_SUCCESS); 2563 } 2564 2565 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2566 { 2567 Mat St; 2568 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2569 PetscScalar *array; 2570 #if defined(PETSC_USE_COMPLEX) 2571 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2572 #endif 2573 2574 PetscFunctionBegin; 2575 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2576 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2577 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2578 PetscCall(MatSetType(St, MATDENSE)); 2579 PetscCall(MatSetUp(St)); 2580 PetscCall(MatDenseGetArray(St, &array)); 2581 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2582 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2583 PetscInt i, j, N = mumps->id.size_schur; 2584 for (i = 0; i < N; i++) { 2585 for (j = 0; j < N; j++) { 2586 #if !defined(PETSC_USE_COMPLEX) 2587 PetscScalar val = mumps->id.schur[i * N + j]; 2588 #else 2589 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2590 #endif 2591 array[j * N + i] = val; 2592 } 2593 } 2594 } else { /* stored by columns */ 2595 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2596 } 2597 } else { /* either full or lower-triangular (not packed) */ 2598 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2599 PetscInt i, j, N = mumps->id.size_schur; 2600 for (i = 0; i < N; i++) { 2601 for (j = i; j < N; j++) { 2602 #if !defined(PETSC_USE_COMPLEX) 2603 PetscScalar val = mumps->id.schur[i * N + j]; 2604 #else 2605 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2606 #endif 2607 array[i * N + j] = val; 2608 array[j * N + i] = val; 2609 } 2610 } 2611 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2612 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2613 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2614 PetscInt i, j, N = mumps->id.size_schur; 2615 for (i = 0; i < N; i++) { 2616 for (j = 0; j < i + 1; j++) { 2617 #if !defined(PETSC_USE_COMPLEX) 2618 PetscScalar val = mumps->id.schur[i * N + j]; 2619 #else 2620 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2621 #endif 2622 array[i * N + j] = val; 2623 array[j * N + i] = val; 2624 } 2625 } 2626 } 2627 } 2628 PetscCall(MatDenseRestoreArray(St, &array)); 2629 *S = St; 2630 PetscFunctionReturn(PETSC_SUCCESS); 2631 } 2632 2633 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2634 { 2635 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2636 2637 PetscFunctionBegin; 2638 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2639 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2640 for (i = 0; i < nICNTL_pre; ++i) 2641 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2642 if (i == nICNTL_pre) { /* not already cached */ 2643 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2644 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2645 mumps->ICNTL_pre[0]++; 2646 } 2647 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2648 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2649 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2650 PetscFunctionReturn(PETSC_SUCCESS); 2651 } 2652 2653 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2654 { 2655 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2656 2657 PetscFunctionBegin; 2658 if (mumps->id.job == JOB_NULL) { 2659 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2660 *ival = 0; 2661 for (i = 0; i < nICNTL_pre; ++i) { 2662 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2663 } 2664 } else *ival = mumps->id.ICNTL(icntl); 2665 PetscFunctionReturn(PETSC_SUCCESS); 2666 } 2667 2668 /*@ 2669 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2670 2671 Logically Collective 2672 2673 Input Parameters: 2674 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2675 . icntl - index of MUMPS parameter array ICNTL() 2676 - ival - value of MUMPS ICNTL(icntl) 2677 2678 Options Database Key: 2679 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2680 2681 Level: beginner 2682 2683 References: 2684 . * - MUMPS Users' Guide 2685 2686 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2687 @*/ 2688 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2689 { 2690 PetscFunctionBegin; 2691 PetscValidType(F, 1); 2692 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2693 PetscValidLogicalCollectiveInt(F, icntl, 2); 2694 PetscValidLogicalCollectiveInt(F, ival, 3); 2695 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2696 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2697 PetscFunctionReturn(PETSC_SUCCESS); 2698 } 2699 2700 /*@ 2701 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2702 2703 Logically Collective 2704 2705 Input Parameters: 2706 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2707 - icntl - index of MUMPS parameter array ICNTL() 2708 2709 Output Parameter: 2710 . ival - value of MUMPS ICNTL(icntl) 2711 2712 Level: beginner 2713 2714 References: 2715 . * - MUMPS Users' Guide 2716 2717 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2718 @*/ 2719 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2720 { 2721 PetscFunctionBegin; 2722 PetscValidType(F, 1); 2723 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2724 PetscValidLogicalCollectiveInt(F, icntl, 2); 2725 PetscValidIntPointer(ival, 3); 2726 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2727 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2728 PetscFunctionReturn(PETSC_SUCCESS); 2729 } 2730 2731 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2732 { 2733 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2734 2735 PetscFunctionBegin; 2736 if (mumps->id.job == JOB_NULL) { 2737 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2738 for (i = 0; i < nCNTL_pre; ++i) 2739 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2740 if (i == nCNTL_pre) { 2741 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2742 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2743 mumps->CNTL_pre[0]++; 2744 } 2745 mumps->CNTL_pre[1 + 2 * i] = icntl; 2746 mumps->CNTL_pre[2 + 2 * i] = val; 2747 } else mumps->id.CNTL(icntl) = val; 2748 PetscFunctionReturn(PETSC_SUCCESS); 2749 } 2750 2751 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2752 { 2753 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2754 2755 PetscFunctionBegin; 2756 if (mumps->id.job == JOB_NULL) { 2757 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2758 *val = 0.0; 2759 for (i = 0; i < nCNTL_pre; ++i) { 2760 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2761 } 2762 } else *val = mumps->id.CNTL(icntl); 2763 PetscFunctionReturn(PETSC_SUCCESS); 2764 } 2765 2766 /*@ 2767 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2768 2769 Logically Collective 2770 2771 Input Parameters: 2772 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2773 . icntl - index of MUMPS parameter array CNTL() 2774 - val - value of MUMPS CNTL(icntl) 2775 2776 Options Database Key: 2777 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2778 2779 Level: beginner 2780 2781 References: 2782 . * - MUMPS Users' Guide 2783 2784 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2785 @*/ 2786 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2787 { 2788 PetscFunctionBegin; 2789 PetscValidType(F, 1); 2790 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2791 PetscValidLogicalCollectiveInt(F, icntl, 2); 2792 PetscValidLogicalCollectiveReal(F, val, 3); 2793 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2794 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2795 PetscFunctionReturn(PETSC_SUCCESS); 2796 } 2797 2798 /*@ 2799 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2800 2801 Logically Collective 2802 2803 Input Parameters: 2804 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2805 - icntl - index of MUMPS parameter array CNTL() 2806 2807 Output Parameter: 2808 . val - value of MUMPS CNTL(icntl) 2809 2810 Level: beginner 2811 2812 References: 2813 . * - MUMPS Users' Guide 2814 2815 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2816 @*/ 2817 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2818 { 2819 PetscFunctionBegin; 2820 PetscValidType(F, 1); 2821 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2822 PetscValidLogicalCollectiveInt(F, icntl, 2); 2823 PetscValidRealPointer(val, 3); 2824 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2825 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2826 PetscFunctionReturn(PETSC_SUCCESS); 2827 } 2828 2829 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2830 { 2831 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2832 2833 PetscFunctionBegin; 2834 *info = mumps->id.INFO(icntl); 2835 PetscFunctionReturn(PETSC_SUCCESS); 2836 } 2837 2838 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2839 { 2840 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2841 2842 PetscFunctionBegin; 2843 *infog = mumps->id.INFOG(icntl); 2844 PetscFunctionReturn(PETSC_SUCCESS); 2845 } 2846 2847 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2848 { 2849 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2850 2851 PetscFunctionBegin; 2852 *rinfo = mumps->id.RINFO(icntl); 2853 PetscFunctionReturn(PETSC_SUCCESS); 2854 } 2855 2856 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2857 { 2858 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2859 2860 PetscFunctionBegin; 2861 *rinfog = mumps->id.RINFOG(icntl); 2862 PetscFunctionReturn(PETSC_SUCCESS); 2863 } 2864 2865 PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2866 { 2867 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2868 2869 PetscFunctionBegin; 2870 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2871 *size = 0; 2872 *array = NULL; 2873 if (!mumps->myid) { 2874 *size = mumps->id.INFOG(28); 2875 PetscCall(PetscMalloc1(*size, array)); 2876 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2877 } 2878 PetscFunctionReturn(PETSC_SUCCESS); 2879 } 2880 2881 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2882 { 2883 Mat Bt = NULL, Btseq = NULL; 2884 PetscBool flg; 2885 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2886 PetscScalar *aa; 2887 PetscInt spnr, *ia, *ja, M, nrhs; 2888 2889 PetscFunctionBegin; 2890 PetscValidPointer(spRHS, 2); 2891 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2892 if (flg) { 2893 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2894 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2895 2896 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2897 2898 if (mumps->petsc_size > 1) { 2899 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2900 Btseq = b->A; 2901 } else { 2902 Btseq = Bt; 2903 } 2904 2905 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2906 mumps->id.nrhs = nrhs; 2907 mumps->id.lrhs = M; 2908 mumps->id.rhs = NULL; 2909 2910 if (!mumps->myid) { 2911 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2912 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2913 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2914 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2915 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2916 } else { 2917 mumps->id.irhs_ptr = NULL; 2918 mumps->id.irhs_sparse = NULL; 2919 mumps->id.nz_rhs = 0; 2920 mumps->id.rhs_sparse = NULL; 2921 } 2922 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2923 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2924 2925 /* solve phase */ 2926 mumps->id.job = JOB_SOLVE; 2927 PetscMUMPS_c(mumps); 2928 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 2929 2930 if (!mumps->myid) { 2931 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 2932 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2933 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2934 } 2935 PetscFunctionReturn(PETSC_SUCCESS); 2936 } 2937 2938 /*@ 2939 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 2940 2941 Logically Collective 2942 2943 Input Parameter: 2944 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2945 2946 Output Parameter: 2947 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 2948 2949 Level: beginner 2950 2951 References: 2952 . * - MUMPS Users' Guide 2953 2954 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 2955 @*/ 2956 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2957 { 2958 PetscFunctionBegin; 2959 PetscValidType(F, 1); 2960 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2961 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 2962 PetscFunctionReturn(PETSC_SUCCESS); 2963 } 2964 2965 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 2966 { 2967 Mat spRHS; 2968 2969 PetscFunctionBegin; 2970 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 2971 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 2972 PetscCall(MatDestroy(&spRHS)); 2973 PetscFunctionReturn(PETSC_SUCCESS); 2974 } 2975 2976 /*@ 2977 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 2978 2979 Logically Collective 2980 2981 Input Parameter: 2982 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2983 2984 Output Parameter: 2985 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 2986 2987 Level: beginner 2988 2989 References: 2990 . * - MUMPS Users' Guide 2991 2992 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 2993 @*/ 2994 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 2995 { 2996 PetscBool flg; 2997 2998 PetscFunctionBegin; 2999 PetscValidType(F, 1); 3000 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3001 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3002 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3003 3004 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3005 PetscFunctionReturn(PETSC_SUCCESS); 3006 } 3007 3008 /*@ 3009 MatMumpsGetInfo - Get MUMPS parameter INFO() 3010 3011 Logically Collective 3012 3013 Input Parameters: 3014 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3015 - icntl - index of MUMPS parameter array INFO() 3016 3017 Output Parameter: 3018 . ival - value of MUMPS INFO(icntl) 3019 3020 Level: beginner 3021 3022 References: 3023 . * - MUMPS Users' Guide 3024 3025 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3026 @*/ 3027 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3028 { 3029 PetscFunctionBegin; 3030 PetscValidType(F, 1); 3031 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3032 PetscValidIntPointer(ival, 3); 3033 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3034 PetscFunctionReturn(PETSC_SUCCESS); 3035 } 3036 3037 /*@ 3038 MatMumpsGetInfog - Get MUMPS parameter INFOG() 3039 3040 Logically Collective 3041 3042 Input Parameters: 3043 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3044 - icntl - index of MUMPS parameter array INFOG() 3045 3046 Output Parameter: 3047 . ival - value of MUMPS INFOG(icntl) 3048 3049 Level: beginner 3050 3051 References: 3052 . * - MUMPS Users' Guide 3053 3054 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3055 @*/ 3056 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3057 { 3058 PetscFunctionBegin; 3059 PetscValidType(F, 1); 3060 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3061 PetscValidIntPointer(ival, 3); 3062 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3063 PetscFunctionReturn(PETSC_SUCCESS); 3064 } 3065 3066 /*@ 3067 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3068 3069 Logically Collective 3070 3071 Input Parameters: 3072 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3073 - icntl - index of MUMPS parameter array RINFO() 3074 3075 Output Parameter: 3076 . val - value of MUMPS RINFO(icntl) 3077 3078 Level: beginner 3079 3080 References: 3081 . * - MUMPS Users' Guide 3082 3083 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3084 @*/ 3085 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3086 { 3087 PetscFunctionBegin; 3088 PetscValidType(F, 1); 3089 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3090 PetscValidRealPointer(val, 3); 3091 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3092 PetscFunctionReturn(PETSC_SUCCESS); 3093 } 3094 3095 /*@ 3096 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3097 3098 Logically Collective 3099 3100 Input Parameters: 3101 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3102 - icntl - index of MUMPS parameter array RINFOG() 3103 3104 Output Parameter: 3105 . val - value of MUMPS RINFOG(icntl) 3106 3107 Level: beginner 3108 3109 References: 3110 . * - MUMPS Users' Guide 3111 3112 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3113 @*/ 3114 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3115 { 3116 PetscFunctionBegin; 3117 PetscValidType(F, 1); 3118 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3119 PetscValidRealPointer(val, 3); 3120 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3121 PetscFunctionReturn(PETSC_SUCCESS); 3122 } 3123 3124 /*@ 3125 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 3126 3127 Logically Collective 3128 3129 Input Parameter: 3130 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3131 3132 Output Parameters: 3133 + size - local size of the array. The size of the array is non-zero only on the host. 3134 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3135 for freeing this array. 3136 3137 Level: beginner 3138 3139 References: 3140 . * - MUMPS Users' Guide 3141 3142 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3143 @*/ 3144 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3145 { 3146 PetscFunctionBegin; 3147 PetscValidType(F, 1); 3148 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3149 PetscValidIntPointer(size, 3); 3150 PetscValidPointer(array, 4); 3151 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3152 PetscFunctionReturn(PETSC_SUCCESS); 3153 } 3154 3155 /*MC 3156 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3157 distributed and sequential matrices via the external package MUMPS. 3158 3159 Works with `MATAIJ` and `MATSBAIJ` matrices 3160 3161 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3162 3163 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3164 See details below. 3165 3166 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3167 3168 Options Database Keys: 3169 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3170 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3171 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3172 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3173 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3174 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3175 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3176 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3177 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3178 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3179 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3180 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3181 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3182 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3183 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3184 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3185 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3186 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3187 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3188 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3189 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3190 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 3191 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3192 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3193 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3194 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3195 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3196 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3197 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3198 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3199 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3200 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3201 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3202 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3203 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3204 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3205 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3206 3207 Level: beginner 3208 3209 Notes: 3210 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 3211 error if the matrix is Hermitian. 3212 3213 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3214 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3215 3216 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3217 the failure with 3218 .vb 3219 KSPGetPC(ksp,&pc); 3220 PCFactorGetMatrix(pc,&mat); 3221 MatMumpsGetInfo(mat,....); 3222 MatMumpsGetInfog(mat,....); etc. 3223 .ve 3224 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3225 3226 MUMPS provides 64-bit integer support in two build modes: 3227 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3228 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3229 3230 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3231 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3232 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3233 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3234 3235 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3236 3237 Two modes to run MUMPS/PETSc with OpenMP 3238 .vb 3239 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3240 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3241 .ve 3242 3243 .vb 3244 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3245 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3246 .ve 3247 3248 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3249 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3250 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3251 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3252 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3253 3254 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3255 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3256 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3257 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3258 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3259 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3260 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3261 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3262 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3263 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3264 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3265 examine the mapping result. 3266 3267 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3268 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3269 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3270 3271 References: 3272 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3273 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3274 3275 .seealso: [](chapter_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3276 M*/ 3277 3278 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3279 { 3280 PetscFunctionBegin; 3281 *type = MATSOLVERMUMPS; 3282 PetscFunctionReturn(PETSC_SUCCESS); 3283 } 3284 3285 /* MatGetFactor for Seq and MPI AIJ matrices */ 3286 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3287 { 3288 Mat B; 3289 Mat_MUMPS *mumps; 3290 PetscBool isSeqAIJ; 3291 PetscMPIInt size; 3292 3293 PetscFunctionBegin; 3294 #if defined(PETSC_USE_COMPLEX) 3295 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3296 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3297 *F = NULL; 3298 PetscFunctionReturn(PETSC_SUCCESS); 3299 } 3300 #endif 3301 /* Create the factorization matrix */ 3302 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3303 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3304 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3305 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3306 PetscCall(MatSetUp(B)); 3307 3308 PetscCall(PetscNew(&mumps)); 3309 3310 B->ops->view = MatView_MUMPS; 3311 B->ops->getinfo = MatGetInfo_MUMPS; 3312 3313 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3314 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3315 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3316 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3317 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3318 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3319 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3320 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3321 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3322 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3323 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3324 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3325 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3326 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3327 3328 if (ftype == MAT_FACTOR_LU) { 3329 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3330 B->factortype = MAT_FACTOR_LU; 3331 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3332 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3333 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3334 mumps->sym = 0; 3335 } else { 3336 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3337 B->factortype = MAT_FACTOR_CHOLESKY; 3338 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3339 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3340 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3341 #if defined(PETSC_USE_COMPLEX) 3342 mumps->sym = 2; 3343 #else 3344 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3345 else mumps->sym = 2; 3346 #endif 3347 } 3348 3349 /* set solvertype */ 3350 PetscCall(PetscFree(B->solvertype)); 3351 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3352 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3353 if (size == 1) { 3354 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3355 B->canuseordering = PETSC_TRUE; 3356 } 3357 B->ops->destroy = MatDestroy_MUMPS; 3358 B->data = (void *)mumps; 3359 3360 *F = B; 3361 mumps->id.job = JOB_NULL; 3362 mumps->ICNTL_pre = NULL; 3363 mumps->CNTL_pre = NULL; 3364 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3365 PetscFunctionReturn(PETSC_SUCCESS); 3366 } 3367 3368 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3369 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3370 { 3371 Mat B; 3372 Mat_MUMPS *mumps; 3373 PetscBool isSeqSBAIJ; 3374 PetscMPIInt size; 3375 3376 PetscFunctionBegin; 3377 #if defined(PETSC_USE_COMPLEX) 3378 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3379 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3380 *F = NULL; 3381 PetscFunctionReturn(PETSC_SUCCESS); 3382 } 3383 #endif 3384 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3385 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3386 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3387 PetscCall(MatSetUp(B)); 3388 3389 PetscCall(PetscNew(&mumps)); 3390 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3391 if (isSeqSBAIJ) { 3392 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3393 } else { 3394 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3395 } 3396 3397 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3398 B->ops->view = MatView_MUMPS; 3399 B->ops->getinfo = MatGetInfo_MUMPS; 3400 3401 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3402 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3403 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3404 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3405 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3406 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3407 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3408 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3409 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3410 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3411 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3412 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3413 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3414 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3415 3416 B->factortype = MAT_FACTOR_CHOLESKY; 3417 #if defined(PETSC_USE_COMPLEX) 3418 mumps->sym = 2; 3419 #else 3420 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3421 else mumps->sym = 2; 3422 #endif 3423 3424 /* set solvertype */ 3425 PetscCall(PetscFree(B->solvertype)); 3426 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3427 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3428 if (size == 1) { 3429 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3430 B->canuseordering = PETSC_TRUE; 3431 } 3432 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3433 B->ops->destroy = MatDestroy_MUMPS; 3434 B->data = (void *)mumps; 3435 3436 *F = B; 3437 mumps->id.job = JOB_NULL; 3438 mumps->ICNTL_pre = NULL; 3439 mumps->CNTL_pre = NULL; 3440 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3441 PetscFunctionReturn(PETSC_SUCCESS); 3442 } 3443 3444 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3445 { 3446 Mat B; 3447 Mat_MUMPS *mumps; 3448 PetscBool isSeqBAIJ; 3449 PetscMPIInt size; 3450 3451 PetscFunctionBegin; 3452 /* Create the factorization matrix */ 3453 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3454 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3455 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3456 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3457 PetscCall(MatSetUp(B)); 3458 3459 PetscCall(PetscNew(&mumps)); 3460 if (ftype == MAT_FACTOR_LU) { 3461 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3462 B->factortype = MAT_FACTOR_LU; 3463 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3464 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3465 mumps->sym = 0; 3466 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3467 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3468 3469 B->ops->view = MatView_MUMPS; 3470 B->ops->getinfo = MatGetInfo_MUMPS; 3471 3472 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3473 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3474 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3475 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3476 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3477 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3478 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3479 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3480 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3481 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3482 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3483 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3484 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3485 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3486 3487 /* set solvertype */ 3488 PetscCall(PetscFree(B->solvertype)); 3489 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3490 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3491 if (size == 1) { 3492 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3493 B->canuseordering = PETSC_TRUE; 3494 } 3495 B->ops->destroy = MatDestroy_MUMPS; 3496 B->data = (void *)mumps; 3497 3498 *F = B; 3499 mumps->id.job = JOB_NULL; 3500 mumps->ICNTL_pre = NULL; 3501 mumps->CNTL_pre = NULL; 3502 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3503 PetscFunctionReturn(PETSC_SUCCESS); 3504 } 3505 3506 /* MatGetFactor for Seq and MPI SELL matrices */ 3507 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3508 { 3509 Mat B; 3510 Mat_MUMPS *mumps; 3511 PetscBool isSeqSELL; 3512 PetscMPIInt size; 3513 3514 PetscFunctionBegin; 3515 /* Create the factorization matrix */ 3516 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3517 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3518 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3519 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3520 PetscCall(MatSetUp(B)); 3521 3522 PetscCall(PetscNew(&mumps)); 3523 3524 B->ops->view = MatView_MUMPS; 3525 B->ops->getinfo = MatGetInfo_MUMPS; 3526 3527 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3528 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3529 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3530 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3531 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3532 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3533 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3534 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3535 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3536 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3537 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3538 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3539 3540 if (ftype == MAT_FACTOR_LU) { 3541 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3542 B->factortype = MAT_FACTOR_LU; 3543 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3544 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3545 mumps->sym = 0; 3546 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3547 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3548 3549 /* set solvertype */ 3550 PetscCall(PetscFree(B->solvertype)); 3551 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3552 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3553 if (size == 1) { 3554 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3555 B->canuseordering = PETSC_TRUE; 3556 } 3557 B->ops->destroy = MatDestroy_MUMPS; 3558 B->data = (void *)mumps; 3559 3560 *F = B; 3561 mumps->id.job = JOB_NULL; 3562 mumps->ICNTL_pre = NULL; 3563 mumps->CNTL_pre = NULL; 3564 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3565 PetscFunctionReturn(PETSC_SUCCESS); 3566 } 3567 3568 /* MatGetFactor for MATNEST matrices */ 3569 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3570 { 3571 Mat B, **mats; 3572 Mat_MUMPS *mumps; 3573 PetscInt nr, nc; 3574 PetscMPIInt size; 3575 PetscBool flg = PETSC_TRUE; 3576 3577 PetscFunctionBegin; 3578 #if defined(PETSC_USE_COMPLEX) 3579 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3580 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3581 *F = NULL; 3582 PetscFunctionReturn(PETSC_SUCCESS); 3583 } 3584 #endif 3585 3586 /* Return if some condition is not satisfied */ 3587 *F = NULL; 3588 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3589 if (ftype == MAT_FACTOR_CHOLESKY) { 3590 IS *rows, *cols; 3591 PetscInt *m, *M; 3592 3593 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3594 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3595 PetscCall(MatNestGetISs(A, rows, cols)); 3596 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3597 if (!flg) { 3598 PetscCall(PetscFree2(rows, cols)); 3599 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3600 PetscFunctionReturn(PETSC_SUCCESS); 3601 } 3602 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3603 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3604 for (PetscInt r = 0; flg && r < nr; r++) 3605 for (PetscInt k = r + 1; flg && k < nr; k++) 3606 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3607 PetscCall(PetscFree2(m, M)); 3608 PetscCall(PetscFree2(rows, cols)); 3609 if (!flg) { 3610 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3611 PetscFunctionReturn(PETSC_SUCCESS); 3612 } 3613 } 3614 3615 for (PetscInt r = 0; r < nr; r++) { 3616 for (PetscInt c = 0; c < nc; c++) { 3617 Mat sub = mats[r][c]; 3618 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans; 3619 3620 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3621 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3622 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3623 else { 3624 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3625 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3626 } 3627 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3628 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3629 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3630 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3631 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3632 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3633 if (ftype == MAT_FACTOR_CHOLESKY) { 3634 if (r == c && !isSeqAIJ && !isMPIAIJ && !isSeqSBAIJ && !isMPISBAIJ) { 3635 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3636 flg = PETSC_FALSE; 3637 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 3638 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3639 flg = PETSC_FALSE; 3640 } 3641 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 3642 PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3643 flg = PETSC_FALSE; 3644 } 3645 } 3646 } 3647 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3648 3649 /* Create the factorization matrix */ 3650 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3651 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3652 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3653 PetscCall(MatSetUp(B)); 3654 3655 PetscCall(PetscNew(&mumps)); 3656 3657 B->ops->view = MatView_MUMPS; 3658 B->ops->getinfo = MatGetInfo_MUMPS; 3659 3660 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3661 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3662 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3663 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3664 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3665 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3666 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3667 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3668 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3669 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3670 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3671 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3672 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3673 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3674 3675 if (ftype == MAT_FACTOR_LU) { 3676 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3677 B->factortype = MAT_FACTOR_LU; 3678 mumps->sym = 0; 3679 } else { 3680 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3681 B->factortype = MAT_FACTOR_CHOLESKY; 3682 #if defined(PETSC_USE_COMPLEX) 3683 mumps->sym = 2; 3684 #else 3685 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3686 else mumps->sym = 2; 3687 #endif 3688 } 3689 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3690 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3691 3692 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3693 if (size == 1) { 3694 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3695 B->canuseordering = PETSC_TRUE; 3696 } 3697 3698 /* set solvertype */ 3699 PetscCall(PetscFree(B->solvertype)); 3700 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3701 B->ops->destroy = MatDestroy_MUMPS; 3702 B->data = (void *)mumps; 3703 3704 *F = B; 3705 mumps->id.job = JOB_NULL; 3706 mumps->ICNTL_pre = NULL; 3707 mumps->CNTL_pre = NULL; 3708 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3709 PetscFunctionReturn(PETSC_SUCCESS); 3710 } 3711 3712 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3713 { 3714 PetscFunctionBegin; 3715 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3716 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3717 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3718 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3719 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3720 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3721 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3722 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3723 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3724 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3725 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3726 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3727 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3728 PetscFunctionReturn(PETSC_SUCCESS); 3729 } 3730