11c2a3de1SBarry Smith 2397b6df1SKris Buschelman /* 3c2b5dc30SHong Zhang Provides an interface to the MUMPS sparse solver 4397b6df1SKris Buschelman */ 567602552SJunchao Zhang #include <petscpkg_version.h> 69d0448ceSStefano Zampini #include <petscsf.h> 7c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 8c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 97ee00b23SStefano Zampini #include <../src/mat/impls/sell/mpi/mpisell.h> 10397b6df1SKris Buschelman 11397b6df1SKris Buschelman EXTERN_C_BEGIN 12397b6df1SKris Buschelman #if defined(PETSC_USE_COMPLEX) 132907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 142907cef9SHong Zhang #include <cmumps_c.h> 152907cef9SHong Zhang #else 16c6db04a5SJed Brown #include <zmumps_c.h> 172907cef9SHong Zhang #endif 182907cef9SHong Zhang #else 192907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 202907cef9SHong Zhang #include <smumps_c.h> 21397b6df1SKris Buschelman #else 22c6db04a5SJed Brown #include <dmumps_c.h> 23397b6df1SKris Buschelman #endif 242907cef9SHong Zhang #endif 25397b6df1SKris Buschelman EXTERN_C_END 26397b6df1SKris Buschelman #define JOB_INIT -1 27413bcc21SPierre Jolivet #define JOB_NULL 0 283d472b54SHong Zhang #define JOB_FACTSYMBOLIC 1 293d472b54SHong Zhang #define JOB_FACTNUMERIC 2 303d472b54SHong Zhang #define JOB_SOLVE 3 31397b6df1SKris Buschelman #define JOB_END -2 323d472b54SHong Zhang 332907cef9SHong Zhang /* calls to MUMPS */ 342907cef9SHong Zhang #if defined(PETSC_USE_COMPLEX) 352907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 363ab56b82SJunchao Zhang #define MUMPS_c cmumps_c 372907cef9SHong Zhang #else 383ab56b82SJunchao Zhang #define MUMPS_c zmumps_c 392907cef9SHong Zhang #endif 402907cef9SHong Zhang #else 412907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 423ab56b82SJunchao Zhang #define MUMPS_c smumps_c 432907cef9SHong Zhang #else 443ab56b82SJunchao Zhang #define MUMPS_c dmumps_c 452907cef9SHong Zhang #endif 462907cef9SHong Zhang #endif 472907cef9SHong Zhang 48a6053eceSJunchao Zhang /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 49a6053eceSJunchao Zhang number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 50a6053eceSJunchao Zhang naming convention in PetscMPIInt, PetscBLASInt etc. 51a6053eceSJunchao Zhang */ 52a6053eceSJunchao Zhang typedef MUMPS_INT PetscMUMPSInt; 53a6053eceSJunchao Zhang 5467602552SJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 5567602552SJunchao Zhang #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 56a6053eceSJunchao Zhang #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 5767602552SJunchao Zhang #endif 58a6053eceSJunchao Zhang #else 5967602552SJunchao Zhang #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 6067602552SJunchao Zhang #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 6167602552SJunchao Zhang #endif 6267602552SJunchao Zhang #endif 6367602552SJunchao Zhang 64a6053eceSJunchao Zhang #define MPIU_MUMPSINT MPI_INT 65a6053eceSJunchao Zhang #define PETSC_MUMPS_INT_MAX 2147483647 66a6053eceSJunchao Zhang #define PETSC_MUMPS_INT_MIN -2147483648 67a6053eceSJunchao Zhang 68a6053eceSJunchao Zhang /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 69d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 70d71ae5a4SJacob Faibussowitsch { 71a6053eceSJunchao Zhang PetscFunctionBegin; 72ece88022SPierre Jolivet #if PetscDefined(USE_64BIT_INDICES) 732c71b3e2SJacob Faibussowitsch PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 74ece88022SPierre Jolivet #endif 75a6053eceSJunchao Zhang *b = (PetscMUMPSInt)(a); 763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 77a6053eceSJunchao Zhang } 78a6053eceSJunchao Zhang 79a6053eceSJunchao Zhang /* Put these utility routines here since they are only used in this file */ 80d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 81d71ae5a4SJacob Faibussowitsch { 82a6053eceSJunchao Zhang PetscInt myval; 83a6053eceSJunchao Zhang PetscBool myset; 84a6053eceSJunchao Zhang PetscFunctionBegin; 85a6053eceSJunchao Zhang /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 869566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 879566063dSJacob Faibussowitsch if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 88a6053eceSJunchao Zhang if (set) *set = myset; 893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 90a6053eceSJunchao Zhang } 91a6053eceSJunchao Zhang #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 92a6053eceSJunchao Zhang 93217d3b1eSJunchao Zhang /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 943ab56b82SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 953ab56b82SJunchao Zhang #define PetscMUMPS_c(mumps) \ 963ab56b82SJunchao Zhang do { \ 973ab56b82SJunchao Zhang if (mumps->use_petsc_omp_support) { \ 983ab56b82SJunchao Zhang if (mumps->is_omp_master) { \ 999566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 10014ffdc6fSStefano Zampini PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 10114ffdc6fSStefano Zampini PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 10214ffdc6fSStefano Zampini PetscCall(PetscFPTrapPop()); \ 1039566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 1043ab56b82SJunchao Zhang } \ 1059566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 106c3714a1dSJunchao Zhang /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 107c3714a1dSJunchao Zhang to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 108c3714a1dSJunchao Zhang an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 109c3714a1dSJunchao Zhang omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 110c3714a1dSJunchao Zhang */ \ 111338d3105SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 112338d3105SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 113338d3105SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 114338d3105SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 1153ab56b82SJunchao Zhang } else { \ 11614ffdc6fSStefano Zampini PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 11714ffdc6fSStefano Zampini PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 11814ffdc6fSStefano Zampini PetscCall(PetscFPTrapPop()); \ 1193ab56b82SJunchao Zhang } \ 1203ab56b82SJunchao Zhang } while (0) 1213ab56b82SJunchao Zhang #else 1223ab56b82SJunchao Zhang #define PetscMUMPS_c(mumps) \ 123d71ae5a4SJacob Faibussowitsch do { \ 12414ffdc6fSStefano Zampini PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 12514ffdc6fSStefano Zampini PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 12614ffdc6fSStefano Zampini PetscCall(PetscFPTrapPop()); \ 127d71ae5a4SJacob Faibussowitsch } while (0) 1283ab56b82SJunchao Zhang #endif 1293ab56b82SJunchao Zhang 130940cd9d6SSatish Balay /* declare MumpsScalar */ 131940cd9d6SSatish Balay #if defined(PETSC_USE_COMPLEX) 132940cd9d6SSatish Balay #if defined(PETSC_USE_REAL_SINGLE) 133940cd9d6SSatish Balay #define MumpsScalar mumps_complex 134940cd9d6SSatish Balay #else 135940cd9d6SSatish Balay #define MumpsScalar mumps_double_complex 136940cd9d6SSatish Balay #endif 137940cd9d6SSatish Balay #else 138940cd9d6SSatish Balay #define MumpsScalar PetscScalar 139940cd9d6SSatish Balay #endif 1403d472b54SHong Zhang 141397b6df1SKris Buschelman /* macros s.t. indices match MUMPS documentation */ 142397b6df1SKris Buschelman #define ICNTL(I) icntl[(I)-1] 143397b6df1SKris Buschelman #define CNTL(I) cntl[(I)-1] 144397b6df1SKris Buschelman #define INFOG(I) infog[(I)-1] 145a7aca84bSHong Zhang #define INFO(I) info[(I)-1] 146397b6df1SKris Buschelman #define RINFOG(I) rinfog[(I)-1] 147adc1d99fSHong Zhang #define RINFO(I) rinfo[(I)-1] 148397b6df1SKris Buschelman 149a6053eceSJunchao Zhang typedef struct Mat_MUMPS Mat_MUMPS; 150a6053eceSJunchao Zhang struct Mat_MUMPS { 151397b6df1SKris Buschelman #if defined(PETSC_USE_COMPLEX) 1522907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 1532907cef9SHong Zhang CMUMPS_STRUC_C id; 1542907cef9SHong Zhang #else 155397b6df1SKris Buschelman ZMUMPS_STRUC_C id; 1562907cef9SHong Zhang #endif 1572907cef9SHong Zhang #else 1582907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 1592907cef9SHong Zhang SMUMPS_STRUC_C id; 160397b6df1SKris Buschelman #else 161397b6df1SKris Buschelman DMUMPS_STRUC_C id; 162397b6df1SKris Buschelman #endif 1632907cef9SHong Zhang #endif 1642907cef9SHong Zhang 165397b6df1SKris Buschelman MatStructure matstruc; 1662d4298aeSJunchao Zhang PetscMPIInt myid, petsc_size; 167a6053eceSJunchao Zhang PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 168a6053eceSJunchao Zhang PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 169a6053eceSJunchao Zhang PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 170a6053eceSJunchao Zhang PetscMUMPSInt sym; 1712d4298aeSJunchao Zhang MPI_Comm mumps_comm; 172413bcc21SPierre Jolivet PetscMUMPSInt *ICNTL_pre; 173413bcc21SPierre Jolivet PetscReal *CNTL_pre; 174a6053eceSJunchao Zhang PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 175801fbe65SHong Zhang VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 17625aac85cSJunchao Zhang PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 17767602552SJunchao Zhang PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 17867602552SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 17967602552SJunchao Zhang PetscInt *rhs_nrow, max_nrhs; 18067602552SJunchao Zhang PetscMPIInt *rhs_recvcounts, *rhs_disps; 18167602552SJunchao Zhang PetscScalar *rhs_loc, *rhs_recvbuf; 18267602552SJunchao Zhang #endif 183801fbe65SHong Zhang Vec b_seq, x_seq; 184a6053eceSJunchao Zhang PetscInt ninfo, *info; /* which INFO to display */ 185b5fa320bSStefano Zampini PetscInt sizeredrhs; 18659ac8732SStefano Zampini PetscScalar *schur_sol; 18759ac8732SStefano Zampini PetscInt schur_sizesol; 188a6053eceSJunchao Zhang PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 189a6053eceSJunchao Zhang PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 190a6053eceSJunchao Zhang PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 1912205254eSKarl Rupp 1929d0448ceSStefano Zampini /* Support for MATNEST */ 1939d0448ceSStefano Zampini PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 1949d0448ceSStefano Zampini PetscInt64 *nest_vals_start; 1959d0448ceSStefano Zampini PetscScalar *nest_vals; 1969d0448ceSStefano Zampini 197a6053eceSJunchao Zhang /* stuff used by petsc/mumps OpenMP support*/ 1983ab56b82SJunchao Zhang PetscBool use_petsc_omp_support; 199da81f932SPierre Jolivet PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 2003ab56b82SJunchao Zhang MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 201a6053eceSJunchao Zhang PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 202a6053eceSJunchao Zhang PetscMPIInt tag, omp_comm_size; 2033ab56b82SJunchao Zhang PetscBool is_omp_master; /* is this rank the master of omp_comm */ 204a6053eceSJunchao Zhang MPI_Request *reqs; 205a6053eceSJunchao Zhang }; 2063ab56b82SJunchao Zhang 207a6053eceSJunchao Zhang /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 208a6053eceSJunchao Zhang Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 209a6053eceSJunchao Zhang */ 210d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 211d71ae5a4SJacob Faibussowitsch { 212a6053eceSJunchao Zhang PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 213f0c56d0fSKris Buschelman 214a6053eceSJunchao Zhang PetscFunctionBegin; 215a6053eceSJunchao Zhang #if defined(PETSC_USE_64BIT_INDICES) 216a6053eceSJunchao Zhang { 217a6053eceSJunchao Zhang PetscInt i; 218a6053eceSJunchao Zhang if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 2199566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ia_alloc)); 2209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 221a6053eceSJunchao Zhang mumps->cur_ilen = nrow + 1; 222a6053eceSJunchao Zhang } 223a6053eceSJunchao Zhang if (nnz > mumps->cur_jlen) { 2249566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ja_alloc)); 2259566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 226a6053eceSJunchao Zhang mumps->cur_jlen = nnz; 227a6053eceSJunchao Zhang } 2289566063dSJacob Faibussowitsch for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 2299566063dSJacob Faibussowitsch for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 230a6053eceSJunchao Zhang *ia_mumps = mumps->ia_alloc; 231a6053eceSJunchao Zhang *ja_mumps = mumps->ja_alloc; 232a6053eceSJunchao Zhang } 233a6053eceSJunchao Zhang #else 234a6053eceSJunchao Zhang *ia_mumps = ia; 235a6053eceSJunchao Zhang *ja_mumps = ja; 236a6053eceSJunchao Zhang #endif 2379566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 2383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 239a6053eceSJunchao Zhang } 240b24902e0SBarry Smith 241d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 242d71ae5a4SJacob Faibussowitsch { 243b5fa320bSStefano Zampini PetscFunctionBegin; 2449566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.listvar_schur)); 2459566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.redrhs)); 2469566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->schur_sol)); 24759ac8732SStefano Zampini mumps->id.size_schur = 0; 248b3cb21ddSStefano Zampini mumps->id.schur_lld = 0; 24959ac8732SStefano Zampini mumps->id.ICNTL(19) = 0; 2503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25159ac8732SStefano Zampini } 25259ac8732SStefano Zampini 253b3cb21ddSStefano Zampini /* solve with rhs in mumps->id.redrhs and return in the same location */ 254d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 255d71ae5a4SJacob Faibussowitsch { 256b3cb21ddSStefano Zampini Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 257b3cb21ddSStefano Zampini Mat S, B, X; 258b3cb21ddSStefano Zampini MatFactorSchurStatus schurstatus; 259b3cb21ddSStefano Zampini PetscInt sizesol; 26059ac8732SStefano Zampini 26159ac8732SStefano Zampini PetscFunctionBegin; 2629566063dSJacob Faibussowitsch PetscCall(MatFactorFactorizeSchurComplement(F)); 2639566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 2649566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 2659566063dSJacob Faibussowitsch PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 266a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2679566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(B, S->boundtocpu)); 268a3d589ffSStefano Zampini #endif 269b3cb21ddSStefano Zampini switch (schurstatus) { 270d71ae5a4SJacob Faibussowitsch case MAT_FACTOR_SCHUR_FACTORED: 271d71ae5a4SJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 272d71ae5a4SJacob Faibussowitsch PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 273a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2749566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(X, S->boundtocpu)); 275a3d589ffSStefano Zampini #endif 276b3cb21ddSStefano Zampini if (!mumps->id.ICNTL(9)) { /* transpose solve */ 2779566063dSJacob Faibussowitsch PetscCall(MatMatSolveTranspose(S, B, X)); 27859ac8732SStefano Zampini } else { 2799566063dSJacob Faibussowitsch PetscCall(MatMatSolve(S, B, X)); 28059ac8732SStefano Zampini } 281b3cb21ddSStefano Zampini break; 282b3cb21ddSStefano Zampini case MAT_FACTOR_SCHUR_INVERTED: 283b3cb21ddSStefano Zampini sizesol = mumps->id.nrhs * mumps->id.size_schur; 28459ac8732SStefano Zampini if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 2859566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->schur_sol)); 2869566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 28759ac8732SStefano Zampini mumps->schur_sizesol = sizesol; 288b5fa320bSStefano Zampini } 2899566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 2909566063dSJacob Faibussowitsch PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 291a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2929566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(X, S->boundtocpu)); 293a3d589ffSStefano Zampini #endif 2949566063dSJacob Faibussowitsch PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 29559ac8732SStefano Zampini if (!mumps->id.ICNTL(9)) { /* transpose solve */ 2969566063dSJacob Faibussowitsch PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 297b5fa320bSStefano Zampini } else { 2989566063dSJacob Faibussowitsch PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 299b5fa320bSStefano Zampini } 3009566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(X)); 3019566063dSJacob Faibussowitsch PetscCall(MatProductSymbolic(X)); 3029566063dSJacob Faibussowitsch PetscCall(MatProductNumeric(X)); 3034417c5e8SHong Zhang 3049566063dSJacob Faibussowitsch PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 305b3cb21ddSStefano Zampini break; 306d71ae5a4SJacob Faibussowitsch default: 307d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 30859ac8732SStefano Zampini } 3099566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 3109566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 3119566063dSJacob Faibussowitsch PetscCall(MatDestroy(&X)); 3123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 313b5fa320bSStefano Zampini } 314b5fa320bSStefano Zampini 315d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 316d71ae5a4SJacob Faibussowitsch { 317b3cb21ddSStefano Zampini Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 318b5fa320bSStefano Zampini 319b5fa320bSStefano Zampini PetscFunctionBegin; 320b5fa320bSStefano Zampini if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 3213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 322b5fa320bSStefano Zampini } 323b8f61ee1SStefano Zampini if (!expansion) { /* prepare for the condensation step */ 324b5fa320bSStefano Zampini PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 325b5fa320bSStefano Zampini /* allocate MUMPS internal array to store reduced right-hand sides */ 326b5fa320bSStefano Zampini if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 3279566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.redrhs)); 328b5fa320bSStefano Zampini mumps->id.lredrhs = mumps->id.size_schur; 3299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 330b5fa320bSStefano Zampini mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 331b5fa320bSStefano Zampini } 332b5fa320bSStefano Zampini mumps->id.ICNTL(26) = 1; /* condensation phase */ 333b5fa320bSStefano Zampini } else { /* prepare for the expansion step */ 334b8f61ee1SStefano Zampini /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 3359566063dSJacob Faibussowitsch PetscCall(MatMumpsSolveSchur_Private(F)); 336b5fa320bSStefano Zampini mumps->id.ICNTL(26) = 2; /* expansion phase */ 3373ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 33808401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 339b5fa320bSStefano Zampini /* restore defaults */ 340b5fa320bSStefano Zampini mumps->id.ICNTL(26) = -1; 341d3d598ffSStefano Zampini /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 342d3d598ffSStefano Zampini if (mumps->id.nrhs > 1) { 3439566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.redrhs)); 344d3d598ffSStefano Zampini mumps->id.lredrhs = 0; 345d3d598ffSStefano Zampini mumps->sizeredrhs = 0; 346d3d598ffSStefano Zampini } 347b5fa320bSStefano Zampini } 3483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 349b5fa320bSStefano Zampini } 350b5fa320bSStefano Zampini 351397b6df1SKris Buschelman /* 352d341cd04SHong Zhang MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 353d341cd04SHong Zhang 354397b6df1SKris Buschelman input: 35575480915SPierre Jolivet A - matrix in aij,baij or sbaij format 356397b6df1SKris Buschelman shift - 0: C style output triple; 1: Fortran style output triple. 357bccb9932SShri Abhyankar reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 358bccb9932SShri Abhyankar MAT_REUSE_MATRIX: only the values in v array are updated 359397b6df1SKris Buschelman output: 360397b6df1SKris Buschelman nnz - dim of r, c, and v (number of local nonzero entries of A) 361397b6df1SKris Buschelman r, c, v - row and col index, matrix values (matrix triples) 362eb9baa12SBarry Smith 363eb9baa12SBarry Smith The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 3647ee00b23SStefano Zampini freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 365eb9baa12SBarry Smith that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 366eb9baa12SBarry Smith 367397b6df1SKris Buschelman */ 36816ebf90aSShri Abhyankar 369d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 370d71ae5a4SJacob Faibussowitsch { 371a3d589ffSStefano Zampini const PetscScalar *av; 372185f6596SHong Zhang const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 373a6053eceSJunchao Zhang PetscInt64 nz, rnz, i, j, k; 374a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 37516ebf90aSShri Abhyankar Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 376397b6df1SKris Buschelman 377397b6df1SKris Buschelman PetscFunctionBegin; 3789566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A, &av)); 379a6053eceSJunchao Zhang mumps->val = (PetscScalar *)av; 380bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 3812205254eSKarl Rupp nz = aa->nz; 3822205254eSKarl Rupp ai = aa->i; 3832205254eSKarl Rupp aj = aa->j; 3849566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 385a6053eceSJunchao Zhang for (i = k = 0; i < M; i++) { 38616ebf90aSShri Abhyankar rnz = ai[i + 1] - ai[i]; 38767877ebaSShri Abhyankar ajj = aj + ai[i]; 38867877ebaSShri Abhyankar for (j = 0; j < rnz; j++) { 3899566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 3909566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 391a6053eceSJunchao Zhang k++; 39216ebf90aSShri Abhyankar } 39316ebf90aSShri Abhyankar } 394a6053eceSJunchao Zhang mumps->irn = row; 395a6053eceSJunchao Zhang mumps->jcn = col; 396a6053eceSJunchao Zhang mumps->nnz = nz; 39716ebf90aSShri Abhyankar } 3989566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 3993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 40016ebf90aSShri Abhyankar } 401397b6df1SKris Buschelman 402d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 403d71ae5a4SJacob Faibussowitsch { 404a6053eceSJunchao Zhang PetscInt64 nz, i, j, k, r; 4057ee00b23SStefano Zampini Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 406a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 4077ee00b23SStefano Zampini 4087ee00b23SStefano Zampini PetscFunctionBegin; 409a6053eceSJunchao Zhang mumps->val = a->val; 4107ee00b23SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 4117ee00b23SStefano Zampini nz = a->sliidx[a->totalslices]; 4129566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 413a6053eceSJunchao Zhang for (i = k = 0; i < a->totalslices; i++) { 41448a46eb9SPierre Jolivet for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 4157ee00b23SStefano Zampini } 4169566063dSJacob Faibussowitsch for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 417a6053eceSJunchao Zhang mumps->irn = row; 418a6053eceSJunchao Zhang mumps->jcn = col; 419a6053eceSJunchao Zhang mumps->nnz = nz; 4207ee00b23SStefano Zampini } 4213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4227ee00b23SStefano Zampini } 4237ee00b23SStefano Zampini 424d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 425d71ae5a4SJacob Faibussowitsch { 42667877ebaSShri Abhyankar Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 42733d57670SJed Brown const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 428a6053eceSJunchao Zhang PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 429a6053eceSJunchao Zhang PetscInt bs; 430a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 43167877ebaSShri Abhyankar 43267877ebaSShri Abhyankar PetscFunctionBegin; 4339566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 43433d57670SJed Brown M = A->rmap->N / bs; 435a6053eceSJunchao Zhang mumps->val = aa->a; 436bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 4379371c9d4SSatish Balay ai = aa->i; 4389371c9d4SSatish Balay aj = aa->j; 43967877ebaSShri Abhyankar nz = bs2 * aa->nz; 4409566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 44167877ebaSShri Abhyankar for (i = 0; i < M; i++) { 44267877ebaSShri Abhyankar ajj = aj + ai[i]; 44367877ebaSShri Abhyankar rnz = ai[i + 1] - ai[i]; 44467877ebaSShri Abhyankar for (k = 0; k < rnz; k++) { 44567877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 44667877ebaSShri Abhyankar for (m = 0; m < bs; m++) { 4479566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 4489566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 449a6053eceSJunchao Zhang idx++; 45067877ebaSShri Abhyankar } 45167877ebaSShri Abhyankar } 45267877ebaSShri Abhyankar } 45367877ebaSShri Abhyankar } 454a6053eceSJunchao Zhang mumps->irn = row; 455a6053eceSJunchao Zhang mumps->jcn = col; 456a6053eceSJunchao Zhang mumps->nnz = nz; 45767877ebaSShri Abhyankar } 4583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 45967877ebaSShri Abhyankar } 46067877ebaSShri Abhyankar 461d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 462d71ae5a4SJacob Faibussowitsch { 46375480915SPierre Jolivet const PetscInt *ai, *aj, *ajj; 464a6053eceSJunchao Zhang PetscInt bs; 465a6053eceSJunchao Zhang PetscInt64 nz, rnz, i, j, k, m; 466a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 46775480915SPierre Jolivet PetscScalar *val; 46816ebf90aSShri Abhyankar Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 46975480915SPierre Jolivet const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 47038548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 471b94d7dedSBarry Smith PetscBool isset, hermitian; 47238548759SBarry Smith #endif 47316ebf90aSShri Abhyankar 47416ebf90aSShri Abhyankar PetscFunctionBegin; 47538548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 476b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 477b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 47838548759SBarry Smith #endif 4792205254eSKarl Rupp ai = aa->i; 4802205254eSKarl Rupp aj = aa->j; 4819566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 48275480915SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 483f3fa974cSJacob Faibussowitsch const PetscInt64 alloc_size = aa->nz * bs2; 484f3fa974cSJacob Faibussowitsch 485f3fa974cSJacob Faibussowitsch PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 486a6053eceSJunchao Zhang if (bs > 1) { 487f3fa974cSJacob Faibussowitsch PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 488a6053eceSJunchao Zhang mumps->val = mumps->val_alloc; 48975480915SPierre Jolivet } else { 490a6053eceSJunchao Zhang mumps->val = aa->a; 49175480915SPierre Jolivet } 492a6053eceSJunchao Zhang mumps->irn = row; 493a6053eceSJunchao Zhang mumps->jcn = col; 494a6053eceSJunchao Zhang } else { 495a6053eceSJunchao Zhang if (bs == 1) mumps->val = aa->a; 496a6053eceSJunchao Zhang row = mumps->irn; 497a6053eceSJunchao Zhang col = mumps->jcn; 498a6053eceSJunchao Zhang } 499a6053eceSJunchao Zhang val = mumps->val; 500185f6596SHong Zhang 50116ebf90aSShri Abhyankar nz = 0; 502a81fe166SPierre Jolivet if (bs > 1) { 50375480915SPierre Jolivet for (i = 0; i < mbs; i++) { 50416ebf90aSShri Abhyankar rnz = ai[i + 1] - ai[i]; 50567877ebaSShri Abhyankar ajj = aj + ai[i]; 50675480915SPierre Jolivet for (j = 0; j < rnz; j++) { 50775480915SPierre Jolivet for (k = 0; k < bs; k++) { 50875480915SPierre Jolivet for (m = 0; m < bs; m++) { 509ec4f40fdSPierre Jolivet if (ajj[j] > i || k >= m) { 51075480915SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 5119566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 5129566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 51375480915SPierre Jolivet } 51475480915SPierre Jolivet val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 51575480915SPierre Jolivet } 51675480915SPierre Jolivet } 51775480915SPierre Jolivet } 51875480915SPierre Jolivet } 51975480915SPierre Jolivet } 520a81fe166SPierre Jolivet } else if (reuse == MAT_INITIAL_MATRIX) { 521a81fe166SPierre Jolivet for (i = 0; i < mbs; i++) { 522a81fe166SPierre Jolivet rnz = ai[i + 1] - ai[i]; 523a81fe166SPierre Jolivet ajj = aj + ai[i]; 524a81fe166SPierre Jolivet for (j = 0; j < rnz; j++) { 5259566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 5269566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 527a6053eceSJunchao Zhang nz++; 528a81fe166SPierre Jolivet } 529a81fe166SPierre Jolivet } 53008401ef6SPierre Jolivet PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 53175480915SPierre Jolivet } 532a6053eceSJunchao Zhang if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 5333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 53416ebf90aSShri Abhyankar } 53516ebf90aSShri Abhyankar 536d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 537d71ae5a4SJacob Faibussowitsch { 53867877ebaSShri Abhyankar const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 539a6053eceSJunchao Zhang PetscInt64 nz, rnz, i, j; 54067877ebaSShri Abhyankar const PetscScalar *av, *v1; 54116ebf90aSShri Abhyankar PetscScalar *val; 542a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 543829b1710SHong Zhang Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 54429b521d4Sstefano_zampini PetscBool missing; 54538548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 546b94d7dedSBarry Smith PetscBool hermitian, isset; 54738548759SBarry Smith #endif 54816ebf90aSShri Abhyankar 54916ebf90aSShri Abhyankar PetscFunctionBegin; 55038548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 551b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 552b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 55338548759SBarry Smith #endif 5549566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A, &av)); 5559371c9d4SSatish Balay ai = aa->i; 5569371c9d4SSatish Balay aj = aa->j; 55716ebf90aSShri Abhyankar adiag = aa->diag; 5589566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 559bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 5607ee00b23SStefano Zampini /* count nz in the upper triangular part of A */ 561829b1710SHong Zhang nz = 0; 56229b521d4Sstefano_zampini if (missing) { 56329b521d4Sstefano_zampini for (i = 0; i < M; i++) { 56429b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 56529b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 56629b521d4Sstefano_zampini if (aj[j] < i) continue; 56729b521d4Sstefano_zampini nz++; 56829b521d4Sstefano_zampini } 56929b521d4Sstefano_zampini } else { 57029b521d4Sstefano_zampini nz += ai[i + 1] - adiag[i]; 57129b521d4Sstefano_zampini } 57229b521d4Sstefano_zampini } 57329b521d4Sstefano_zampini } else { 574829b1710SHong Zhang for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 57529b521d4Sstefano_zampini } 5769566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 5779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 578a6053eceSJunchao Zhang mumps->nnz = nz; 579a6053eceSJunchao Zhang mumps->irn = row; 580a6053eceSJunchao Zhang mumps->jcn = col; 581a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 582185f6596SHong Zhang 58316ebf90aSShri Abhyankar nz = 0; 58429b521d4Sstefano_zampini if (missing) { 58529b521d4Sstefano_zampini for (i = 0; i < M; i++) { 58629b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 58729b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 58829b521d4Sstefano_zampini if (aj[j] < i) continue; 5899566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 5909566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 59129b521d4Sstefano_zampini val[nz] = av[j]; 59229b521d4Sstefano_zampini nz++; 59329b521d4Sstefano_zampini } 59429b521d4Sstefano_zampini } else { 59529b521d4Sstefano_zampini rnz = ai[i + 1] - adiag[i]; 59629b521d4Sstefano_zampini ajj = aj + adiag[i]; 59729b521d4Sstefano_zampini v1 = av + adiag[i]; 59829b521d4Sstefano_zampini for (j = 0; j < rnz; j++) { 5999566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 6009566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 601a6053eceSJunchao Zhang val[nz++] = v1[j]; 60229b521d4Sstefano_zampini } 60329b521d4Sstefano_zampini } 60429b521d4Sstefano_zampini } 60529b521d4Sstefano_zampini } else { 60616ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 60716ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 60867877ebaSShri Abhyankar ajj = aj + adiag[i]; 609cf3759fdSShri Abhyankar v1 = av + adiag[i]; 61067877ebaSShri Abhyankar for (j = 0; j < rnz; j++) { 6119566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 6129566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 613a6053eceSJunchao Zhang val[nz++] = v1[j]; 61416ebf90aSShri Abhyankar } 61516ebf90aSShri Abhyankar } 61629b521d4Sstefano_zampini } 617397b6df1SKris Buschelman } else { 618a6053eceSJunchao Zhang nz = 0; 619a6053eceSJunchao Zhang val = mumps->val; 62029b521d4Sstefano_zampini if (missing) { 62116ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 62229b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 62329b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 62429b521d4Sstefano_zampini if (aj[j] < i) continue; 62529b521d4Sstefano_zampini val[nz++] = av[j]; 62629b521d4Sstefano_zampini } 62729b521d4Sstefano_zampini } else { 62816ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 62967877ebaSShri Abhyankar v1 = av + adiag[i]; 630ad540459SPierre Jolivet for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 63116ebf90aSShri Abhyankar } 63216ebf90aSShri Abhyankar } 63329b521d4Sstefano_zampini } else { 63416ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 63516ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 63616ebf90aSShri Abhyankar v1 = av + adiag[i]; 637ad540459SPierre Jolivet for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 63816ebf90aSShri Abhyankar } 63916ebf90aSShri Abhyankar } 64029b521d4Sstefano_zampini } 6419566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 6423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 64316ebf90aSShri Abhyankar } 64416ebf90aSShri Abhyankar 645d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 646d71ae5a4SJacob Faibussowitsch { 647a6053eceSJunchao Zhang const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 648a6053eceSJunchao Zhang PetscInt bs; 649a6053eceSJunchao Zhang PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 650a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 65116ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 65216ebf90aSShri Abhyankar PetscScalar *val; 653397b6df1SKris Buschelman Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 654397b6df1SKris Buschelman Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 655397b6df1SKris Buschelman Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 656ec4f40fdSPierre Jolivet const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 65738548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 658b94d7dedSBarry Smith PetscBool hermitian, isset; 65938548759SBarry Smith #endif 66016ebf90aSShri Abhyankar 66116ebf90aSShri Abhyankar PetscFunctionBegin; 66238548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 663b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 664b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 66538548759SBarry Smith #endif 6669566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 66738548759SBarry Smith rstart = A->rmap->rstart; 66838548759SBarry Smith ai = aa->i; 66938548759SBarry Smith aj = aa->j; 67038548759SBarry Smith bi = bb->i; 67138548759SBarry Smith bj = bb->j; 67238548759SBarry Smith av = aa->a; 67338548759SBarry Smith bv = bb->a; 674397b6df1SKris Buschelman 6752205254eSKarl Rupp garray = mat->garray; 6762205254eSKarl Rupp 677bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 678a6053eceSJunchao Zhang nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 6799566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 6809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 681a6053eceSJunchao Zhang /* can not decide the exact mumps->nnz now because of the SBAIJ */ 682a6053eceSJunchao Zhang mumps->irn = row; 683a6053eceSJunchao Zhang mumps->jcn = col; 684a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 685397b6df1SKris Buschelman } else { 686a6053eceSJunchao Zhang val = mumps->val; 687397b6df1SKris Buschelman } 688397b6df1SKris Buschelman 6899371c9d4SSatish Balay jj = 0; 6909371c9d4SSatish Balay irow = rstart; 691ec4f40fdSPierre Jolivet for (i = 0; i < mbs; i++) { 692397b6df1SKris Buschelman ajj = aj + ai[i]; /* ptr to the beginning of this row */ 693397b6df1SKris Buschelman countA = ai[i + 1] - ai[i]; 694397b6df1SKris Buschelman countB = bi[i + 1] - bi[i]; 695397b6df1SKris Buschelman bjj = bj + bi[i]; 696ec4f40fdSPierre Jolivet v1 = av + ai[i] * bs2; 697ec4f40fdSPierre Jolivet v2 = bv + bi[i] * bs2; 698397b6df1SKris Buschelman 699ec4f40fdSPierre Jolivet if (bs > 1) { 700ec4f40fdSPierre Jolivet /* A-part */ 701ec4f40fdSPierre Jolivet for (j = 0; j < countA; j++) { 702ec4f40fdSPierre Jolivet for (k = 0; k < bs; k++) { 703ec4f40fdSPierre Jolivet for (m = 0; m < bs; m++) { 704ec4f40fdSPierre Jolivet if (rstart + ajj[j] * bs > irow || k >= m) { 705ec4f40fdSPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 7069566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 7079566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 708ec4f40fdSPierre Jolivet } 709ec4f40fdSPierre Jolivet val[jj++] = v1[j * bs2 + m + k * bs]; 710ec4f40fdSPierre Jolivet } 711ec4f40fdSPierre Jolivet } 712ec4f40fdSPierre Jolivet } 713ec4f40fdSPierre Jolivet } 714ec4f40fdSPierre Jolivet 715ec4f40fdSPierre Jolivet /* B-part */ 716ec4f40fdSPierre Jolivet for (j = 0; j < countB; j++) { 717ec4f40fdSPierre Jolivet for (k = 0; k < bs; k++) { 718ec4f40fdSPierre Jolivet for (m = 0; m < bs; m++) { 719ec4f40fdSPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 7209566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 7219566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 722ec4f40fdSPierre Jolivet } 723ec4f40fdSPierre Jolivet val[jj++] = v2[j * bs2 + m + k * bs]; 724ec4f40fdSPierre Jolivet } 725ec4f40fdSPierre Jolivet } 726ec4f40fdSPierre Jolivet } 727ec4f40fdSPierre Jolivet } else { 728397b6df1SKris Buschelman /* A-part */ 729397b6df1SKris Buschelman for (j = 0; j < countA; j++) { 730bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 7319566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 7329566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 733397b6df1SKris Buschelman } 73416ebf90aSShri Abhyankar val[jj++] = v1[j]; 735397b6df1SKris Buschelman } 73616ebf90aSShri Abhyankar 73716ebf90aSShri Abhyankar /* B-part */ 73816ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 739bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 7409566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 7419566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 742397b6df1SKris Buschelman } 74316ebf90aSShri Abhyankar val[jj++] = v2[j]; 74416ebf90aSShri Abhyankar } 74516ebf90aSShri Abhyankar } 746ec4f40fdSPierre Jolivet irow += bs; 747ec4f40fdSPierre Jolivet } 7485d955bbbSStefano Zampini if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 7493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 75016ebf90aSShri Abhyankar } 75116ebf90aSShri Abhyankar 752d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 753d71ae5a4SJacob Faibussowitsch { 75416ebf90aSShri Abhyankar const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 7555d955bbbSStefano Zampini PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 756a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 75716ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 75816ebf90aSShri Abhyankar PetscScalar *val; 759a3d589ffSStefano Zampini Mat Ad, Ao; 760a3d589ffSStefano Zampini Mat_SeqAIJ *aa; 761a3d589ffSStefano Zampini Mat_SeqAIJ *bb; 76216ebf90aSShri Abhyankar 76316ebf90aSShri Abhyankar PetscFunctionBegin; 7649566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 7659566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 7669566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 767a3d589ffSStefano Zampini 768a3d589ffSStefano Zampini aa = (Mat_SeqAIJ *)(Ad)->data; 769a3d589ffSStefano Zampini bb = (Mat_SeqAIJ *)(Ao)->data; 77038548759SBarry Smith ai = aa->i; 77138548759SBarry Smith aj = aa->j; 77238548759SBarry Smith bi = bb->i; 77338548759SBarry Smith bj = bb->j; 77416ebf90aSShri Abhyankar 775a3d589ffSStefano Zampini rstart = A->rmap->rstart; 7765d955bbbSStefano Zampini cstart = A->cmap->rstart; 7772205254eSKarl Rupp 778bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 779a6053eceSJunchao Zhang nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 7809566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 7819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 782a6053eceSJunchao Zhang mumps->nnz = nz; 783a6053eceSJunchao Zhang mumps->irn = row; 784a6053eceSJunchao Zhang mumps->jcn = col; 785a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 78616ebf90aSShri Abhyankar } else { 787a6053eceSJunchao Zhang val = mumps->val; 78816ebf90aSShri Abhyankar } 78916ebf90aSShri Abhyankar 7909371c9d4SSatish Balay jj = 0; 7919371c9d4SSatish Balay irow = rstart; 79216ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 79316ebf90aSShri Abhyankar ajj = aj + ai[i]; /* ptr to the beginning of this row */ 79416ebf90aSShri Abhyankar countA = ai[i + 1] - ai[i]; 79516ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 79616ebf90aSShri Abhyankar bjj = bj + bi[i]; 79716ebf90aSShri Abhyankar v1 = av + ai[i]; 79816ebf90aSShri Abhyankar v2 = bv + bi[i]; 79916ebf90aSShri Abhyankar 80016ebf90aSShri Abhyankar /* A-part */ 80116ebf90aSShri Abhyankar for (j = 0; j < countA; j++) { 802bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 8039566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 8045d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 80516ebf90aSShri Abhyankar } 80616ebf90aSShri Abhyankar val[jj++] = v1[j]; 80716ebf90aSShri Abhyankar } 80816ebf90aSShri Abhyankar 80916ebf90aSShri Abhyankar /* B-part */ 81016ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 811bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 8129566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 8139566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 81416ebf90aSShri Abhyankar } 81516ebf90aSShri Abhyankar val[jj++] = v2[j]; 81616ebf90aSShri Abhyankar } 81716ebf90aSShri Abhyankar irow++; 81816ebf90aSShri Abhyankar } 8199566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 8209566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 8213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 82216ebf90aSShri Abhyankar } 82316ebf90aSShri Abhyankar 824d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 825d71ae5a4SJacob Faibussowitsch { 82667877ebaSShri Abhyankar Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 82767877ebaSShri Abhyankar Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 82867877ebaSShri Abhyankar Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 82967877ebaSShri Abhyankar const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 8305d955bbbSStefano Zampini const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 83133d57670SJed Brown const PetscInt bs2 = mat->bs2; 832a6053eceSJunchao Zhang PetscInt bs; 833a6053eceSJunchao Zhang PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 834a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 83567877ebaSShri Abhyankar const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 83667877ebaSShri Abhyankar PetscScalar *val; 83767877ebaSShri Abhyankar 83867877ebaSShri Abhyankar PetscFunctionBegin; 8399566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 840bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 84167877ebaSShri Abhyankar nz = bs2 * (aa->nz + bb->nz); 8429566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 8439566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 844a6053eceSJunchao Zhang mumps->nnz = nz; 845a6053eceSJunchao Zhang mumps->irn = row; 846a6053eceSJunchao Zhang mumps->jcn = col; 847a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 84867877ebaSShri Abhyankar } else { 849a6053eceSJunchao Zhang val = mumps->val; 85067877ebaSShri Abhyankar } 85167877ebaSShri Abhyankar 8529371c9d4SSatish Balay jj = 0; 8539371c9d4SSatish Balay irow = rstart; 85467877ebaSShri Abhyankar for (i = 0; i < mbs; i++) { 85567877ebaSShri Abhyankar countA = ai[i + 1] - ai[i]; 85667877ebaSShri Abhyankar countB = bi[i + 1] - bi[i]; 85767877ebaSShri Abhyankar ajj = aj + ai[i]; 85867877ebaSShri Abhyankar bjj = bj + bi[i]; 85967877ebaSShri Abhyankar v1 = av + bs2 * ai[i]; 86067877ebaSShri Abhyankar v2 = bv + bs2 * bi[i]; 86167877ebaSShri Abhyankar 86267877ebaSShri Abhyankar idx = 0; 86367877ebaSShri Abhyankar /* A-part */ 86467877ebaSShri Abhyankar for (k = 0; k < countA; k++) { 86567877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 86667877ebaSShri Abhyankar for (n = 0; n < bs; n++) { 867bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 8689566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 8695d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 87067877ebaSShri Abhyankar } 87167877ebaSShri Abhyankar val[jj++] = v1[idx++]; 87267877ebaSShri Abhyankar } 87367877ebaSShri Abhyankar } 87467877ebaSShri Abhyankar } 87567877ebaSShri Abhyankar 87667877ebaSShri Abhyankar idx = 0; 87767877ebaSShri Abhyankar /* B-part */ 87867877ebaSShri Abhyankar for (k = 0; k < countB; k++) { 87967877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 88067877ebaSShri Abhyankar for (n = 0; n < bs; n++) { 881bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 8829566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 8839566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 88467877ebaSShri Abhyankar } 885d985c460SShri Abhyankar val[jj++] = v2[idx++]; 88667877ebaSShri Abhyankar } 88767877ebaSShri Abhyankar } 88867877ebaSShri Abhyankar } 889d985c460SShri Abhyankar irow += bs; 89067877ebaSShri Abhyankar } 8913ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 89267877ebaSShri Abhyankar } 89367877ebaSShri Abhyankar 894d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 895d71ae5a4SJacob Faibussowitsch { 89616ebf90aSShri Abhyankar const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 897a6053eceSJunchao Zhang PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 898a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 89916ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 90016ebf90aSShri Abhyankar PetscScalar *val; 901a3d589ffSStefano Zampini Mat Ad, Ao; 902a3d589ffSStefano Zampini Mat_SeqAIJ *aa; 903a3d589ffSStefano Zampini Mat_SeqAIJ *bb; 90438548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 905b94d7dedSBarry Smith PetscBool hermitian, isset; 90638548759SBarry Smith #endif 90716ebf90aSShri Abhyankar 90816ebf90aSShri Abhyankar PetscFunctionBegin; 90938548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 910b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 911b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 91238548759SBarry Smith #endif 9139566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 9149566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 9159566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 916a3d589ffSStefano Zampini 917a3d589ffSStefano Zampini aa = (Mat_SeqAIJ *)(Ad)->data; 918a3d589ffSStefano Zampini bb = (Mat_SeqAIJ *)(Ao)->data; 91938548759SBarry Smith ai = aa->i; 92038548759SBarry Smith aj = aa->j; 92138548759SBarry Smith adiag = aa->diag; 92238548759SBarry Smith bi = bb->i; 92338548759SBarry Smith bj = bb->j; 9242205254eSKarl Rupp 92516ebf90aSShri Abhyankar rstart = A->rmap->rstart; 92616ebf90aSShri Abhyankar 927bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 928e0bace9bSHong Zhang nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 929e0bace9bSHong Zhang nzb = 0; /* num of upper triangular entries in mat->B */ 93016ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 931e0bace9bSHong Zhang nza += (ai[i + 1] - adiag[i]); 93216ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 93316ebf90aSShri Abhyankar bjj = bj + bi[i]; 934e0bace9bSHong Zhang for (j = 0; j < countB; j++) { 935e0bace9bSHong Zhang if (garray[bjj[j]] > rstart) nzb++; 936e0bace9bSHong Zhang } 937e0bace9bSHong Zhang } 93816ebf90aSShri Abhyankar 939e0bace9bSHong Zhang nz = nza + nzb; /* total nz of upper triangular part of mat */ 9409566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 9419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 942a6053eceSJunchao Zhang mumps->nnz = nz; 943a6053eceSJunchao Zhang mumps->irn = row; 944a6053eceSJunchao Zhang mumps->jcn = col; 945a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 94616ebf90aSShri Abhyankar } else { 947a6053eceSJunchao Zhang val = mumps->val; 94816ebf90aSShri Abhyankar } 94916ebf90aSShri Abhyankar 9509371c9d4SSatish Balay jj = 0; 9519371c9d4SSatish Balay irow = rstart; 95216ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 95316ebf90aSShri Abhyankar ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 95416ebf90aSShri Abhyankar v1 = av + adiag[i]; 95516ebf90aSShri Abhyankar countA = ai[i + 1] - adiag[i]; 95616ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 95716ebf90aSShri Abhyankar bjj = bj + bi[i]; 95816ebf90aSShri Abhyankar v2 = bv + bi[i]; 95916ebf90aSShri Abhyankar 96016ebf90aSShri Abhyankar /* A-part */ 96116ebf90aSShri Abhyankar for (j = 0; j < countA; j++) { 962bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 9639566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 9649566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 96516ebf90aSShri Abhyankar } 96616ebf90aSShri Abhyankar val[jj++] = v1[j]; 96716ebf90aSShri Abhyankar } 96816ebf90aSShri Abhyankar 96916ebf90aSShri Abhyankar /* B-part */ 97016ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 97116ebf90aSShri Abhyankar if (garray[bjj[j]] > rstart) { 972bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 9739566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 9749566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 97516ebf90aSShri Abhyankar } 97616ebf90aSShri Abhyankar val[jj++] = v2[j]; 97716ebf90aSShri Abhyankar } 978397b6df1SKris Buschelman } 979397b6df1SKris Buschelman irow++; 980397b6df1SKris Buschelman } 9819566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 9829566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 9833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 984397b6df1SKris Buschelman } 985397b6df1SKris Buschelman 9869d0448ceSStefano Zampini PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 9879d0448ceSStefano Zampini { 9889d0448ceSStefano Zampini Mat **mats; 9899d0448ceSStefano Zampini PetscInt nr, nc; 9909d0448ceSStefano Zampini PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 9919d0448ceSStefano Zampini 9929d0448ceSStefano Zampini PetscFunctionBegin; 9939d0448ceSStefano Zampini PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 9949d0448ceSStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 9959d0448ceSStefano Zampini PetscMUMPSInt *irns, *jcns; 9969d0448ceSStefano Zampini PetscScalar *vals; 9979d0448ceSStefano Zampini PetscInt64 totnnz, cumnnz, maxnnz; 9989d0448ceSStefano Zampini PetscInt *pjcns_w; 9999d0448ceSStefano Zampini IS *rows, *cols; 10009d0448ceSStefano Zampini PetscInt **rows_idx, **cols_idx; 10019d0448ceSStefano Zampini 10029d0448ceSStefano Zampini cumnnz = 0; 10039d0448ceSStefano Zampini maxnnz = 0; 10045d955bbbSStefano Zampini PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 10059d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 10069d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 10079d0448ceSStefano Zampini Mat sub = mats[r][c]; 10089d0448ceSStefano Zampini 10099d0448ceSStefano Zampini mumps->nest_convert_to_triples[r * nc + c] = NULL; 10109d0448ceSStefano Zampini if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 10119d0448ceSStefano Zampini if (sub) { 10129d0448ceSStefano Zampini PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 10135d955bbbSStefano Zampini PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE; 10149d0448ceSStefano Zampini MatInfo info; 10159d0448ceSStefano Zampini 10165d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 10175d955bbbSStefano Zampini if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 10185d955bbbSStefano Zampini else { 10195d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 10205d955bbbSStefano Zampini if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 10215d955bbbSStefano Zampini } 10229d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 10239d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 10249d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 10259d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 10269d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 10279d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 10289d0448ceSStefano Zampini 10299d0448ceSStefano Zampini if (chol) { 10309d0448ceSStefano Zampini if (r == c) { 10319d0448ceSStefano Zampini if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 10329d0448ceSStefano Zampini else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 10339d0448ceSStefano Zampini else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 10349d0448ceSStefano Zampini else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 10359d0448ceSStefano Zampini } else { 10369d0448ceSStefano Zampini if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 10379d0448ceSStefano Zampini else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 10389d0448ceSStefano Zampini else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 10399d0448ceSStefano Zampini else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 10409d0448ceSStefano Zampini } 10419d0448ceSStefano Zampini } else { 10429d0448ceSStefano Zampini if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 10439d0448ceSStefano Zampini else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 10449d0448ceSStefano Zampini else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 10459d0448ceSStefano Zampini else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 10469d0448ceSStefano Zampini } 10479d0448ceSStefano Zampini PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 10489d0448ceSStefano Zampini mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 10499d0448ceSStefano Zampini PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 10509d0448ceSStefano Zampini cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 10519d0448ceSStefano Zampini maxnnz = PetscMax(maxnnz, info.nz_used); 10529d0448ceSStefano Zampini } 10539d0448ceSStefano Zampini } 10549d0448ceSStefano Zampini } 10559d0448ceSStefano Zampini 10569d0448ceSStefano Zampini /* Allocate total COO */ 10579d0448ceSStefano Zampini totnnz = cumnnz; 10589d0448ceSStefano Zampini PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 10599d0448ceSStefano Zampini PetscCall(PetscMalloc1(totnnz, &vals)); 10609d0448ceSStefano Zampini 10619d0448ceSStefano Zampini /* Handle rows and column maps 10629d0448ceSStefano Zampini We directly map rows and use an SF for the columns */ 10639d0448ceSStefano Zampini PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 10649d0448ceSStefano Zampini PetscCall(MatNestGetISs(A, rows, cols)); 10659d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 10669d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 10679d0448ceSStefano Zampini if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 10685d955bbbSStefano Zampini else (void)maxnnz; 10699d0448ceSStefano Zampini 10709d0448ceSStefano Zampini cumnnz = 0; 10719d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 10729d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 10739d0448ceSStefano Zampini Mat sub = mats[r][c]; 10749d0448ceSStefano Zampini const PetscInt *ridx = rows_idx[r]; 10755d955bbbSStefano Zampini const PetscInt *cidx = cols_idx[c]; 10769d0448ceSStefano Zampini PetscInt rst; 10779d0448ceSStefano Zampini PetscSF csf; 10785d955bbbSStefano Zampini PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 10795d955bbbSStefano Zampini PetscLayout cmap; 10809d0448ceSStefano Zampini 10819d0448ceSStefano Zampini mumps->nest_vals_start[r * nc + c] = cumnnz; 10829d0448ceSStefano Zampini if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 10839d0448ceSStefano Zampini 10845d955bbbSStefano Zampini /* Extract inner blocks if needed */ 10855d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 10865d955bbbSStefano Zampini if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 10875d955bbbSStefano Zampini else { 10885d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 10895d955bbbSStefano Zampini if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 10905d955bbbSStefano Zampini } 10915d955bbbSStefano Zampini swap = (PetscBool)(isTrans || isHTrans); 10925d955bbbSStefano Zampini 10935d955bbbSStefano Zampini /* Get column layout to map off-process columns */ 10945d955bbbSStefano Zampini PetscCall(MatGetLayouts(sub, NULL, &cmap)); 10955d955bbbSStefano Zampini 10965d955bbbSStefano Zampini /* Get row start to map on-process rows */ 10975d955bbbSStefano Zampini PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 10985d955bbbSStefano Zampini 10999d0448ceSStefano Zampini /* Directly use the mumps datastructure and use C ordering for now */ 11009d0448ceSStefano Zampini PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 11019d0448ceSStefano Zampini 11025d955bbbSStefano Zampini /* Swap the role of rows and columns indices for transposed blocks 11035d955bbbSStefano Zampini since we need values with global final ordering */ 11045d955bbbSStefano Zampini if (swap) { 11055d955bbbSStefano Zampini cidx = rows_idx[r]; 11065d955bbbSStefano Zampini ridx = cols_idx[c]; 11079d0448ceSStefano Zampini } 11089d0448ceSStefano Zampini 11095d955bbbSStefano Zampini /* Communicate column indices 11105d955bbbSStefano Zampini This could have been done with a single SF but it would have complicated the code a lot. 11115d955bbbSStefano Zampini But since we do it only once, we pay the price of setting up an SF for each block */ 11125d955bbbSStefano Zampini if (PetscDefined(USE_64BIT_INDICES)) { 11135d955bbbSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 11145d955bbbSStefano Zampini } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 11159d0448ceSStefano Zampini PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 11165d955bbbSStefano Zampini PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 11175d955bbbSStefano Zampini PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 11185d955bbbSStefano Zampini PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 11199d0448ceSStefano Zampini PetscCall(PetscSFDestroy(&csf)); 11209d0448ceSStefano Zampini 11215d955bbbSStefano Zampini /* Import indices: use direct map for rows and mapped indices for columns */ 11225d955bbbSStefano Zampini if (swap) { 11235d955bbbSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) { 11245d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 11255d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 11265d955bbbSStefano Zampini } 11275d955bbbSStefano Zampini } else { 11285d955bbbSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) { 11295d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 11305d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 11315d955bbbSStefano Zampini } 11325d955bbbSStefano Zampini } 11335d955bbbSStefano Zampini 11345d955bbbSStefano Zampini /* Import values to full COO */ 11355d955bbbSStefano Zampini if (isHTrans) { /* conjugate the entries */ 11365d955bbbSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 11375d955bbbSStefano Zampini } 11385d955bbbSStefano Zampini PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 11399d0448ceSStefano Zampini 11409d0448ceSStefano Zampini /* Shift new starting point and sanity check */ 11419d0448ceSStefano Zampini cumnnz += mumps->nnz; 11429d0448ceSStefano Zampini PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 11439d0448ceSStefano Zampini 11449d0448ceSStefano Zampini /* Free scratch memory */ 11459d0448ceSStefano Zampini PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 11469d0448ceSStefano Zampini PetscCall(PetscFree(mumps->val_alloc)); 11479d0448ceSStefano Zampini mumps->val = NULL; 11489d0448ceSStefano Zampini mumps->nnz = 0; 11499d0448ceSStefano Zampini } 11509d0448ceSStefano Zampini } 11519d0448ceSStefano Zampini if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 11529d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 11539d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 11549d0448ceSStefano Zampini PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 11559d0448ceSStefano Zampini if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 11565d955bbbSStefano Zampini mumps->nest_vals_start[nr * nc] = cumnnz; 11579d0448ceSStefano Zampini 11589d0448ceSStefano Zampini /* Set pointers for final MUMPS data structure */ 11599d0448ceSStefano Zampini mumps->nest_vals = vals; 11609d0448ceSStefano Zampini mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 11619d0448ceSStefano Zampini mumps->val = vals; 11629d0448ceSStefano Zampini mumps->irn = irns; 11639d0448ceSStefano Zampini mumps->jcn = jcns; 11649d0448ceSStefano Zampini mumps->nnz = cumnnz; 11659d0448ceSStefano Zampini } else { 11669d0448ceSStefano Zampini PetscScalar *oval = mumps->nest_vals; 11679d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 11689d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 11695d955bbbSStefano Zampini PetscBool isTrans, isHTrans = PETSC_FALSE; 11705d955bbbSStefano Zampini Mat sub = mats[r][c]; 11715d955bbbSStefano Zampini PetscInt midx = r * nc + c; 11725d955bbbSStefano Zampini 11735d955bbbSStefano Zampini if (!mumps->nest_convert_to_triples[midx]) continue; 11745d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 11755d955bbbSStefano Zampini if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 11765d955bbbSStefano Zampini else { 11775d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 11785d955bbbSStefano Zampini if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 11795d955bbbSStefano Zampini } 11805d955bbbSStefano Zampini mumps->val = oval + mumps->nest_vals_start[midx]; 11815d955bbbSStefano Zampini PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 11825d955bbbSStefano Zampini if (isHTrans) { 11835d955bbbSStefano Zampini PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 11845d955bbbSStefano Zampini for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 11855d955bbbSStefano Zampini } 11869d0448ceSStefano Zampini } 11879d0448ceSStefano Zampini } 11889d0448ceSStefano Zampini mumps->val = oval; 11899d0448ceSStefano Zampini } 11909d0448ceSStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 11919d0448ceSStefano Zampini } 11929d0448ceSStefano Zampini 1193d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_MUMPS(Mat A) 1194d71ae5a4SJacob Faibussowitsch { 1195a6053eceSJunchao Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1196b24902e0SBarry Smith 1197397b6df1SKris Buschelman PetscFunctionBegin; 11989566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 11999566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 12009566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 12019566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->b_seq)); 12029566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->x_seq)); 12039566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.perm_in)); 12049566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 12059566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->val_alloc)); 12069566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->info)); 1207413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->ICNTL_pre)); 1208413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->CNTL_pre)); 12099566063dSJacob Faibussowitsch PetscCall(MatMumpsResetSchur_Private(mumps)); 1210413bcc21SPierre Jolivet if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1211a5e57a09SHong Zhang mumps->id.job = JOB_END; 12123ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 121308401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1214413bcc21SPierre Jolivet if (mumps->mumps_comm != MPI_COMM_NULL) { 1215413bcc21SPierre Jolivet if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1216413bcc21SPierre Jolivet else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1217413bcc21SPierre Jolivet } 1218413bcc21SPierre Jolivet } 12193ab56b82SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 122067602552SJunchao Zhang if (mumps->use_petsc_omp_support) { 12219566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 12229566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 12239566063dSJacob Faibussowitsch PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 122467602552SJunchao Zhang } 12253ab56b82SJunchao Zhang #endif 12269566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ia_alloc)); 12279566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ja_alloc)); 12289566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->recvcount)); 12299566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->reqs)); 12309566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->irhs_loc)); 12319d0448ceSStefano Zampini PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 12329d0448ceSStefano Zampini PetscCall(PetscFree(mumps->nest_vals)); 12339566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1234bf0cc555SLisandro Dalcin 123597969023SHong Zhang /* clear composed functions */ 12369566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 12379566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 12389566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 12399566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 12409566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 12419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 12429566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 12439566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 12449566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 12459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 12469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 12475c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 12489566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 12499566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 12503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1251397b6df1SKris Buschelman } 1252397b6df1SKris Buschelman 125367602552SJunchao Zhang /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1254d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1255d71ae5a4SJacob Faibussowitsch { 125667602552SJunchao Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 125767602552SJunchao Zhang const PetscMPIInt ompsize = mumps->omp_comm_size; 125867602552SJunchao Zhang PetscInt i, m, M, rstart; 125967602552SJunchao Zhang 126067602552SJunchao Zhang PetscFunctionBegin; 12619566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &M, NULL)); 12629566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &m, NULL)); 126308401ef6SPierre Jolivet PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 126467602552SJunchao Zhang if (ompsize == 1) { 126567602552SJunchao Zhang if (!mumps->irhs_loc) { 126667602552SJunchao Zhang mumps->nloc_rhs = m; 12679566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 12689566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 126967602552SJunchao Zhang for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 127067602552SJunchao Zhang } 127167602552SJunchao Zhang mumps->id.rhs_loc = (MumpsScalar *)array; 127267602552SJunchao Zhang } else { 127367602552SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 127467602552SJunchao Zhang const PetscInt *ranges; 127567602552SJunchao Zhang PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 127667602552SJunchao Zhang MPI_Group petsc_group, omp_group; 127767602552SJunchao Zhang PetscScalar *recvbuf = NULL; 127867602552SJunchao Zhang 127967602552SJunchao Zhang if (mumps->is_omp_master) { 128067602552SJunchao Zhang /* Lazily initialize the omp stuff for distributed rhs */ 128167602552SJunchao Zhang if (!mumps->irhs_loc) { 12829566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 12839566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 12849566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 12859566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 128667602552SJunchao Zhang for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 12879566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 128867602552SJunchao Zhang 128967602552SJunchao Zhang /* Populate mumps->irhs_loc[], rhs_nrow[] */ 129067602552SJunchao Zhang mumps->nloc_rhs = 0; 12919566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(A, &ranges)); 129267602552SJunchao Zhang for (j = 0; j < ompsize; j++) { 129367602552SJunchao Zhang mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 129467602552SJunchao Zhang mumps->nloc_rhs += mumps->rhs_nrow[j]; 129567602552SJunchao Zhang } 12969566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 129767602552SJunchao Zhang for (j = k = 0; j < ompsize; j++) { 129867602552SJunchao Zhang for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 129967602552SJunchao Zhang } 130067602552SJunchao Zhang 13019566063dSJacob Faibussowitsch PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 13029566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&petsc_group)); 13039566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&omp_group)); 130467602552SJunchao Zhang } 130567602552SJunchao Zhang 130667602552SJunchao Zhang /* Realloc buffers when current nrhs is bigger than what we have met */ 130767602552SJunchao Zhang if (nrhs > mumps->max_nrhs) { 13089566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 13099566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 131067602552SJunchao Zhang mumps->max_nrhs = nrhs; 131167602552SJunchao Zhang } 131267602552SJunchao Zhang 131367602552SJunchao Zhang /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 13149566063dSJacob Faibussowitsch for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 131567602552SJunchao Zhang mumps->rhs_disps[0] = 0; 131667602552SJunchao Zhang for (j = 1; j < ompsize; j++) { 131767602552SJunchao Zhang mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 131808401ef6SPierre Jolivet PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 131967602552SJunchao Zhang } 132067602552SJunchao Zhang recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 132167602552SJunchao Zhang } 132267602552SJunchao Zhang 13239566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 13249566063dSJacob Faibussowitsch PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 132567602552SJunchao Zhang 132667602552SJunchao Zhang if (mumps->is_omp_master) { 132767602552SJunchao Zhang if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 132867602552SJunchao Zhang PetscScalar *dst, *dstbase = mumps->rhs_loc; 132967602552SJunchao Zhang for (j = 0; j < ompsize; j++) { 133067602552SJunchao Zhang const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 133167602552SJunchao Zhang dst = dstbase; 133267602552SJunchao Zhang for (i = 0; i < nrhs; i++) { 13339566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 133467602552SJunchao Zhang src += mumps->rhs_nrow[j]; 133567602552SJunchao Zhang dst += mumps->nloc_rhs; 133667602552SJunchao Zhang } 133767602552SJunchao Zhang dstbase += mumps->rhs_nrow[j]; 133867602552SJunchao Zhang } 133967602552SJunchao Zhang } 134067602552SJunchao Zhang mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 134167602552SJunchao Zhang } 134267602552SJunchao Zhang #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 134367602552SJunchao Zhang } 134467602552SJunchao Zhang mumps->id.nrhs = nrhs; 134567602552SJunchao Zhang mumps->id.nloc_rhs = mumps->nloc_rhs; 134667602552SJunchao Zhang mumps->id.lrhs_loc = mumps->nloc_rhs; 134767602552SJunchao Zhang mumps->id.irhs_loc = mumps->irhs_loc; 13483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 134967602552SJunchao Zhang } 135067602552SJunchao Zhang 1351d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1352d71ae5a4SJacob Faibussowitsch { 1353e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 135425aac85cSJunchao Zhang const PetscScalar *rarray = NULL; 1355d54de34fSKris Buschelman PetscScalar *array; 1356329ec9b3SHong Zhang IS is_iden, is_petsc; 1357329ec9b3SHong Zhang PetscInt i; 1358cc86f929SStefano Zampini PetscBool second_solve = PETSC_FALSE; 1359883f2eb9SBarry Smith static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1360397b6df1SKris Buschelman 1361397b6df1SKris Buschelman PetscFunctionBegin; 13629371c9d4SSatish Balay PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 13639371c9d4SSatish Balay "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 13649371c9d4SSatish Balay &cite1)); 13659371c9d4SSatish Balay PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 13669371c9d4SSatish Balay "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 13679371c9d4SSatish Balay &cite2)); 13682aca8efcSHong Zhang 1369603e8f96SBarry Smith if (A->factorerrortype) { 13709566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 13719566063dSJacob Faibussowitsch PetscCall(VecSetInf(x)); 13723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13732aca8efcSHong Zhang } 13742aca8efcSHong Zhang 1375a5e57a09SHong Zhang mumps->id.nrhs = 1; 13762d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 137725aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 137867602552SJunchao Zhang mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 13799566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(b, &rarray)); 13809566063dSJacob Faibussowitsch PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 138125aac85cSJunchao Zhang } else { 138241ffd417SStefano Zampini mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 13839566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 13849566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 138567602552SJunchao Zhang if (!mumps->myid) { 13869566063dSJacob Faibussowitsch PetscCall(VecGetArray(mumps->b_seq, &array)); 138767602552SJunchao Zhang mumps->id.rhs = (MumpsScalar *)array; 138867602552SJunchao Zhang } 138925aac85cSJunchao Zhang } 13903ab56b82SJunchao Zhang } else { /* petsc_size == 1 */ 139167602552SJunchao Zhang mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 13929566063dSJacob Faibussowitsch PetscCall(VecCopy(b, x)); 13939566063dSJacob Faibussowitsch PetscCall(VecGetArray(x, &array)); 1394940cd9d6SSatish Balay mumps->id.rhs = (MumpsScalar *)array; 1395397b6df1SKris Buschelman } 1396397b6df1SKris Buschelman 1397cc86f929SStefano Zampini /* 1398cc86f929SStefano Zampini handle condensation step of Schur complement (if any) 1399cc86f929SStefano Zampini We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1400cc86f929SStefano Zampini According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1401cc86f929SStefano Zampini Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1402cc86f929SStefano Zampini This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1403cc86f929SStefano Zampini */ 1404583f777eSStefano Zampini if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 140508401ef6SPierre Jolivet PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1406cc86f929SStefano Zampini second_solve = PETSC_TRUE; 14079566063dSJacob Faibussowitsch PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1408cc86f929SStefano Zampini } 1409397b6df1SKris Buschelman /* solve phase */ 1410a5e57a09SHong Zhang mumps->id.job = JOB_SOLVE; 14113ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 141208401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1413397b6df1SKris Buschelman 1414b5fa320bSStefano Zampini /* handle expansion step of Schur complement (if any) */ 14151baa6e33SBarry Smith if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1416b5fa320bSStefano Zampini 14172d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1418a5e57a09SHong Zhang if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1419a5e57a09SHong Zhang /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 14209566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1421397b6df1SKris Buschelman } 1422a5e57a09SHong Zhang if (!mumps->scat_sol) { /* create scatter scat_sol */ 1423a6053eceSJunchao Zhang PetscInt *isol2_loc = NULL; 14249566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 14259566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1426a6053eceSJunchao Zhang for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 14279566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 14289566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 14299566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_iden)); 14309566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_petsc)); 1431a5e57a09SHong Zhang mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1432397b6df1SKris Buschelman } 1433a5e57a09SHong Zhang 14349566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 14359566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1436329ec9b3SHong Zhang } 1437353d7d71SJunchao Zhang 143867602552SJunchao Zhang if (mumps->petsc_size > 1) { 143925aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 14409566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(b, &rarray)); 144125aac85cSJunchao Zhang } else if (!mumps->myid) { 14429566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mumps->b_seq, &array)); 144325aac85cSJunchao Zhang } 14449566063dSJacob Faibussowitsch } else PetscCall(VecRestoreArray(x, &array)); 1445353d7d71SJunchao Zhang 144664412097SPierre Jolivet PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 14473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1448397b6df1SKris Buschelman } 1449397b6df1SKris Buschelman 1450d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1451d71ae5a4SJacob Faibussowitsch { 1452e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1453338d3105SPierre Jolivet const PetscMUMPSInt value = mumps->id.ICNTL(9); 145451d5961aSHong Zhang 145551d5961aSHong Zhang PetscFunctionBegin; 1456a5e57a09SHong Zhang mumps->id.ICNTL(9) = 0; 14579566063dSJacob Faibussowitsch PetscCall(MatSolve_MUMPS(A, b, x)); 1458338d3105SPierre Jolivet mumps->id.ICNTL(9) = value; 14593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 146051d5961aSHong Zhang } 146151d5961aSHong Zhang 1462d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1463d71ae5a4SJacob Faibussowitsch { 1464b8491c3eSStefano Zampini Mat Bt = NULL; 1465a6053eceSJunchao Zhang PetscBool denseX, denseB, flg, flgT; 1466e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1467334c5f61SHong Zhang PetscInt i, nrhs, M; 14681683a169SBarry Smith PetscScalar *array; 14691683a169SBarry Smith const PetscScalar *rbray; 1470a6053eceSJunchao Zhang PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1471a6053eceSJunchao Zhang PetscMUMPSInt *isol_loc, *isol_loc_save; 14721683a169SBarry Smith PetscScalar *bray, *sol_loc, *sol_loc_save; 1473be818407SHong Zhang IS is_to, is_from; 1474beae5ec0SHong Zhang PetscInt k, proc, j, m, myrstart; 1475be818407SHong Zhang const PetscInt *rstart; 147667602552SJunchao Zhang Vec v_mpi, msol_loc; 147767602552SJunchao Zhang VecScatter scat_sol; 147867602552SJunchao Zhang Vec b_seq; 147967602552SJunchao Zhang VecScatter scat_rhs; 1480be818407SHong Zhang PetscScalar *aa; 1481be818407SHong Zhang PetscInt spnr, *ia, *ja; 1482d56c302dSHong Zhang Mat_MPIAIJ *b = NULL; 1483bda8bf91SBarry Smith 1484e0b74bf9SHong Zhang PetscFunctionBegin; 14859566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 148628b400f6SJacob Faibussowitsch PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1487be818407SHong Zhang 14889566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1489a6053eceSJunchao Zhang if (denseB) { 149008401ef6SPierre Jolivet PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1491be818407SHong Zhang mumps->id.ICNTL(20) = 0; /* dense RHS */ 14920e6b8875SHong Zhang } else { /* sparse B */ 149308401ef6SPierre Jolivet PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1494013e2dc7SBarry Smith PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1495da81f932SPierre Jolivet if (flgT) { /* input B is transpose of actual RHS matrix, 14960e6b8875SHong Zhang because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 14979566063dSJacob Faibussowitsch PetscCall(MatTransposeGetMat(B, &Bt)); 1498013e2dc7SBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1499be818407SHong Zhang mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1500b8491c3eSStefano Zampini } 150187b22cf4SHong Zhang 15029566063dSJacob Faibussowitsch PetscCall(MatGetSize(B, &M, &nrhs)); 15039481e6e9SHong Zhang mumps->id.nrhs = nrhs; 15049481e6e9SHong Zhang mumps->id.lrhs = M; 15052b691707SHong Zhang mumps->id.rhs = NULL; 15069481e6e9SHong Zhang 15072d4298aeSJunchao Zhang if (mumps->petsc_size == 1) { 1508b8491c3eSStefano Zampini PetscScalar *aa; 1509b8491c3eSStefano Zampini PetscInt spnr, *ia, *ja; 1510e94cce23SStefano Zampini PetscBool second_solve = PETSC_FALSE; 1511b8491c3eSStefano Zampini 15129566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &array)); 1513b8491c3eSStefano Zampini mumps->id.rhs = (MumpsScalar *)array; 15142b691707SHong Zhang 1515a6053eceSJunchao Zhang if (denseB) { 15162b691707SHong Zhang /* copy B to X */ 15179566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &rbray)); 15189566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 15199566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 15202b691707SHong Zhang } else { /* sparse B */ 15219566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(Bt, &aa)); 15229566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 152328b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 15249566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1525b8491c3eSStefano Zampini mumps->id.rhs_sparse = (MumpsScalar *)aa; 1526b8491c3eSStefano Zampini } 1527e94cce23SStefano Zampini /* handle condensation step of Schur complement (if any) */ 1528583f777eSStefano Zampini if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1529e94cce23SStefano Zampini second_solve = PETSC_TRUE; 15309566063dSJacob Faibussowitsch PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1531e94cce23SStefano Zampini } 15322cd7d884SHong Zhang /* solve phase */ 15332cd7d884SHong Zhang mumps->id.job = JOB_SOLVE; 15343ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 153508401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1536b5fa320bSStefano Zampini 1537b5fa320bSStefano Zampini /* handle expansion step of Schur complement (if any) */ 15381baa6e33SBarry Smith if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1539a6053eceSJunchao Zhang if (!denseB) { /* sparse B */ 15409566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 15419566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 154228b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1543b8491c3eSStefano Zampini } 15449566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &array)); 15453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1546be818407SHong Zhang } 1547801fbe65SHong Zhang 15482ef1f0ffSBarry Smith /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1549aed4548fSBarry Smith PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1550241dbb5eSStefano Zampini 1551beae5ec0SHong Zhang /* create msol_loc to hold mumps local solution */ 15521683a169SBarry Smith isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 15531683a169SBarry Smith sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1554801fbe65SHong Zhang 1555a1dfcbd9SJunchao Zhang lsol_loc = mumps->id.lsol_loc; 155671aed81dSHong Zhang nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 15579566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1558940cd9d6SSatish Balay mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1559801fbe65SHong Zhang mumps->id.isol_loc = isol_loc; 1560801fbe65SHong Zhang 15619566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 15622cd7d884SHong Zhang 156367602552SJunchao Zhang if (denseB) { 156425aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 156567602552SJunchao Zhang mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 15669566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &rbray)); 15679566063dSJacob Faibussowitsch PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 15689566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 15699566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(B, &m, NULL)); 15709566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 157125aac85cSJunchao Zhang } else { 157225aac85cSJunchao Zhang mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 157380577c12SJunchao Zhang /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 157480577c12SJunchao Zhang very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 157580577c12SJunchao Zhang 0, re-arrange B into desired order, which is a local operation. 157680577c12SJunchao Zhang */ 157780577c12SJunchao Zhang 157867602552SJunchao Zhang /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1579be818407SHong Zhang /* wrap dense rhs matrix B into a vector v_mpi */ 15809566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(B, &m, NULL)); 15819566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(B, &bray)); 15829566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 15839566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(B, &bray)); 15842b691707SHong Zhang 1585be818407SHong Zhang /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1586801fbe65SHong Zhang if (!mumps->myid) { 1587beae5ec0SHong Zhang PetscInt *idx; 1588beae5ec0SHong Zhang /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 15899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrhs * M, &idx)); 15909566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(B, &rstart)); 1591be818407SHong Zhang k = 0; 15922d4298aeSJunchao Zhang for (proc = 0; proc < mumps->petsc_size; proc++) { 1593be818407SHong Zhang for (j = 0; j < nrhs; j++) { 1594beae5ec0SHong Zhang for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1595be818407SHong Zhang } 1596be818407SHong Zhang } 1597be818407SHong Zhang 15989566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 15999566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 16009566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1601801fbe65SHong Zhang } else { 16029566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 16039566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 16049566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1605801fbe65SHong Zhang } 16069566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 16079566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 16089566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_to)); 16099566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_from)); 16109566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1611801fbe65SHong Zhang 1612801fbe65SHong Zhang if (!mumps->myid) { /* define rhs on the host */ 16139566063dSJacob Faibussowitsch PetscCall(VecGetArray(b_seq, &bray)); 1614940cd9d6SSatish Balay mumps->id.rhs = (MumpsScalar *)bray; 16159566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b_seq, &bray)); 1616801fbe65SHong Zhang } 161725aac85cSJunchao Zhang } 16182b691707SHong Zhang } else { /* sparse B */ 16192b691707SHong Zhang b = (Mat_MPIAIJ *)Bt->data; 16202b691707SHong Zhang 1621be818407SHong Zhang /* wrap dense X into a vector v_mpi */ 16229566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(X, &m, NULL)); 16239566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &bray)); 16249566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 16259566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &bray)); 16262b691707SHong Zhang 16272b691707SHong Zhang if (!mumps->myid) { 16289566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(b->A, &aa)); 16299566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 163028b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 16319566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 16322b691707SHong Zhang mumps->id.rhs_sparse = (MumpsScalar *)aa; 16332b691707SHong Zhang } else { 16342b691707SHong Zhang mumps->id.irhs_ptr = NULL; 16352b691707SHong Zhang mumps->id.irhs_sparse = NULL; 16362b691707SHong Zhang mumps->id.nz_rhs = 0; 16372b691707SHong Zhang mumps->id.rhs_sparse = NULL; 16382b691707SHong Zhang } 16392b691707SHong Zhang } 16402b691707SHong Zhang 1641801fbe65SHong Zhang /* solve phase */ 1642801fbe65SHong Zhang mumps->id.job = JOB_SOLVE; 16433ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 164408401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1645801fbe65SHong Zhang 1646334c5f61SHong Zhang /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 16479566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &array)); 16489566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(v_mpi, array)); 1649801fbe65SHong Zhang 1650334c5f61SHong Zhang /* create scatter scat_sol */ 16519566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(X, &rstart)); 1652beae5ec0SHong Zhang /* iidx: index for scatter mumps solution to petsc X */ 1653beae5ec0SHong Zhang 16549566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 16559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1656beae5ec0SHong Zhang for (i = 0; i < lsol_loc; i++) { 1657beae5ec0SHong Zhang isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1658beae5ec0SHong Zhang 16592d4298aeSJunchao Zhang for (proc = 0; proc < mumps->petsc_size; proc++) { 1660beae5ec0SHong Zhang if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1661beae5ec0SHong Zhang myrstart = rstart[proc]; 1662beae5ec0SHong Zhang k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1663beae5ec0SHong Zhang iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1664beae5ec0SHong Zhang m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1665beae5ec0SHong Zhang break; 1666be818407SHong Zhang } 1667be818407SHong Zhang } 1668be818407SHong Zhang 1669beae5ec0SHong Zhang for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1670801fbe65SHong Zhang } 16719566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 16729566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 16739566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 16749566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_from)); 16759566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_to)); 16769566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 16779566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &array)); 167871aed81dSHong Zhang 167971aed81dSHong Zhang /* free spaces */ 16801683a169SBarry Smith mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 168171aed81dSHong Zhang mumps->id.isol_loc = isol_loc_save; 168271aed81dSHong Zhang 16839566063dSJacob Faibussowitsch PetscCall(PetscFree2(sol_loc, isol_loc)); 16849566063dSJacob Faibussowitsch PetscCall(PetscFree(idxx)); 16859566063dSJacob Faibussowitsch PetscCall(VecDestroy(&msol_loc)); 16869566063dSJacob Faibussowitsch PetscCall(VecDestroy(&v_mpi)); 1687a6053eceSJunchao Zhang if (!denseB) { 16882b691707SHong Zhang if (!mumps->myid) { 1689d56c302dSHong Zhang b = (Mat_MPIAIJ *)Bt->data; 16909566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 16919566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 169228b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 16932b691707SHong Zhang } 16942b691707SHong Zhang } else { 169525aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { 16969566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b_seq)); 16979566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&scat_rhs)); 169825aac85cSJunchao Zhang } 16992b691707SHong Zhang } 17009566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&scat_sol)); 170164412097SPierre Jolivet PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)))); 17023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1703e0b74bf9SHong Zhang } 1704e0b74bf9SHong Zhang 1705d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1706d71ae5a4SJacob Faibussowitsch { 1707b18964edSHong Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1708338d3105SPierre Jolivet const PetscMUMPSInt value = mumps->id.ICNTL(9); 1709b18964edSHong Zhang 1710b18964edSHong Zhang PetscFunctionBegin; 1711b18964edSHong Zhang mumps->id.ICNTL(9) = 0; 1712b18964edSHong Zhang PetscCall(MatMatSolve_MUMPS(A, B, X)); 1713338d3105SPierre Jolivet mumps->id.ICNTL(9) = value; 17143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1715b18964edSHong Zhang } 1716b18964edSHong Zhang 1717d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1718d71ae5a4SJacob Faibussowitsch { 1719eb3ef3b2SHong Zhang PetscBool flg; 1720eb3ef3b2SHong Zhang Mat B; 1721eb3ef3b2SHong Zhang 1722eb3ef3b2SHong Zhang PetscFunctionBegin; 17239566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 172428b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1725eb3ef3b2SHong Zhang 1726eb3ef3b2SHong Zhang /* Create B=Bt^T that uses Bt's data structure */ 17279566063dSJacob Faibussowitsch PetscCall(MatCreateTranspose(Bt, &B)); 1728eb3ef3b2SHong Zhang 17299566063dSJacob Faibussowitsch PetscCall(MatMatSolve_MUMPS(A, B, X)); 17309566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 17313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1732eb3ef3b2SHong Zhang } 1733eb3ef3b2SHong Zhang 1734ace3df97SHong Zhang #if !defined(PETSC_USE_COMPLEX) 1735a58c3f20SHong Zhang /* 1736a58c3f20SHong Zhang input: 1737a58c3f20SHong Zhang F: numeric factor 1738a58c3f20SHong Zhang output: 1739a58c3f20SHong Zhang nneg: total number of negative pivots 174019d49a3bSHong Zhang nzero: total number of zero pivots 174119d49a3bSHong Zhang npos: (global dimension of F) - nneg - nzero 1742a58c3f20SHong Zhang */ 1743d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1744d71ae5a4SJacob Faibussowitsch { 1745e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1746c1490034SHong Zhang PetscMPIInt size; 1747a58c3f20SHong Zhang 1748a58c3f20SHong Zhang PetscFunctionBegin; 17499566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1750bcb30aebSHong Zhang /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1751aed4548fSBarry Smith PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1752ed85ac9fSHong Zhang 1753710ac8efSHong Zhang if (nneg) *nneg = mumps->id.INFOG(12); 1754ed85ac9fSHong Zhang if (nzero || npos) { 175508401ef6SPierre Jolivet PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1756710ac8efSHong Zhang if (nzero) *nzero = mumps->id.INFOG(28); 1757710ac8efSHong Zhang if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1758a58c3f20SHong Zhang } 17593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1760a58c3f20SHong Zhang } 176119d49a3bSHong Zhang #endif 1762a58c3f20SHong Zhang 1763d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1764d71ae5a4SJacob Faibussowitsch { 1765a6053eceSJunchao Zhang PetscInt i, nreqs; 1766a6053eceSJunchao Zhang PetscMUMPSInt *irn, *jcn; 1767a6053eceSJunchao Zhang PetscMPIInt count; 1768a6053eceSJunchao Zhang PetscInt64 totnnz, remain; 1769a6053eceSJunchao Zhang const PetscInt osize = mumps->omp_comm_size; 1770a6053eceSJunchao Zhang PetscScalar *val; 17713ab56b82SJunchao Zhang 17723ab56b82SJunchao Zhang PetscFunctionBegin; 1773a6053eceSJunchao Zhang if (osize > 1) { 17743ab56b82SJunchao Zhang if (reuse == MAT_INITIAL_MATRIX) { 17753ab56b82SJunchao Zhang /* master first gathers counts of nonzeros to receive */ 17769566063dSJacob Faibussowitsch if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 17779566063dSJacob Faibussowitsch PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 17783ab56b82SJunchao Zhang 1779a6053eceSJunchao Zhang /* Then each computes number of send/recvs */ 17803ab56b82SJunchao Zhang if (mumps->is_omp_master) { 1781a6053eceSJunchao Zhang /* Start from 1 since self communication is not done in MPI */ 1782a6053eceSJunchao Zhang nreqs = 0; 1783a6053eceSJunchao Zhang for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1784a6053eceSJunchao Zhang } else { 1785a6053eceSJunchao Zhang nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 17863ab56b82SJunchao Zhang } 178735cb6cd3SPierre Jolivet PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 17883ab56b82SJunchao Zhang 1789a6053eceSJunchao Zhang /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1790a6053eceSJunchao Zhang MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1791a6053eceSJunchao Zhang might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1792a6053eceSJunchao Zhang is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1793a6053eceSJunchao Zhang */ 1794a6053eceSJunchao Zhang nreqs = 0; /* counter for actual send/recvs */ 17953ab56b82SJunchao Zhang if (mumps->is_omp_master) { 1796a6053eceSJunchao Zhang for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 17979566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 17989566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(totnnz, &val)); 1799a6053eceSJunchao Zhang 1800a6053eceSJunchao Zhang /* Self communication */ 18019566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 18029566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 18039566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1804a6053eceSJunchao Zhang 1805a6053eceSJunchao Zhang /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 18069566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 18079566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->val_alloc)); 1808a6053eceSJunchao Zhang mumps->nnz = totnnz; 18093ab56b82SJunchao Zhang mumps->irn = irn; 18103ab56b82SJunchao Zhang mumps->jcn = jcn; 1811a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 1812a6053eceSJunchao Zhang 1813a6053eceSJunchao Zhang irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1814a6053eceSJunchao Zhang jcn += mumps->recvcount[0]; 1815a6053eceSJunchao Zhang val += mumps->recvcount[0]; 1816a6053eceSJunchao Zhang 1817a6053eceSJunchao Zhang /* Remote communication */ 1818a6053eceSJunchao Zhang for (i = 1; i < osize; i++) { 1819a6053eceSJunchao Zhang count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1820a6053eceSJunchao Zhang remain = mumps->recvcount[i] - count; 1821a6053eceSJunchao Zhang while (count > 0) { 18229566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 18239566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 18249566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1825a6053eceSJunchao Zhang irn += count; 1826a6053eceSJunchao Zhang jcn += count; 1827a6053eceSJunchao Zhang val += count; 1828a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1829a6053eceSJunchao Zhang remain -= count; 1830a6053eceSJunchao Zhang } 18313ab56b82SJunchao Zhang } 18323ab56b82SJunchao Zhang } else { 1833a6053eceSJunchao Zhang irn = mumps->irn; 1834a6053eceSJunchao Zhang jcn = mumps->jcn; 1835a6053eceSJunchao Zhang val = mumps->val; 1836a6053eceSJunchao Zhang count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1837a6053eceSJunchao Zhang remain = mumps->nnz - count; 1838a6053eceSJunchao Zhang while (count > 0) { 18399566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 18409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 18419566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1842a6053eceSJunchao Zhang irn += count; 1843a6053eceSJunchao Zhang jcn += count; 1844a6053eceSJunchao Zhang val += count; 1845a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1846a6053eceSJunchao Zhang remain -= count; 18473ab56b82SJunchao Zhang } 18483ab56b82SJunchao Zhang } 1849a6053eceSJunchao Zhang } else { 1850a6053eceSJunchao Zhang nreqs = 0; 1851a6053eceSJunchao Zhang if (mumps->is_omp_master) { 1852a6053eceSJunchao Zhang val = mumps->val + mumps->recvcount[0]; 1853a6053eceSJunchao Zhang for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1854a6053eceSJunchao Zhang count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1855a6053eceSJunchao Zhang remain = mumps->recvcount[i] - count; 1856a6053eceSJunchao Zhang while (count > 0) { 18579566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1858a6053eceSJunchao Zhang val += count; 1859a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1860a6053eceSJunchao Zhang remain -= count; 1861a6053eceSJunchao Zhang } 1862a6053eceSJunchao Zhang } 1863a6053eceSJunchao Zhang } else { 1864a6053eceSJunchao Zhang val = mumps->val; 1865a6053eceSJunchao Zhang count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1866a6053eceSJunchao Zhang remain = mumps->nnz - count; 1867a6053eceSJunchao Zhang while (count > 0) { 18689566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1869a6053eceSJunchao Zhang val += count; 1870a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1871a6053eceSJunchao Zhang remain -= count; 1872a6053eceSJunchao Zhang } 1873a6053eceSJunchao Zhang } 1874a6053eceSJunchao Zhang } 18759566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1876a6053eceSJunchao Zhang mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1877a6053eceSJunchao Zhang } 18783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18793ab56b82SJunchao Zhang } 18803ab56b82SJunchao Zhang 1881d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1882d71ae5a4SJacob Faibussowitsch { 1883e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1884ace3abfcSBarry Smith PetscBool isMPIAIJ; 1885397b6df1SKris Buschelman 1886397b6df1SKris Buschelman PetscFunctionBegin; 1887dbf6bb8dSprj- if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 188848a46eb9SPierre Jolivet if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 18899566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 18903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18912aca8efcSHong Zhang } 18926baea169SHong Zhang 18939566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 18949566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1895397b6df1SKris Buschelman 1896397b6df1SKris Buschelman /* numerical factorization phase */ 1897a5e57a09SHong Zhang mumps->id.job = JOB_FACTNUMERIC; 18984e34a73bSHong Zhang if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1899ad540459SPierre Jolivet if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1900397b6df1SKris Buschelman } else { 1901940cd9d6SSatish Balay mumps->id.a_loc = (MumpsScalar *)mumps->val; 1902397b6df1SKris Buschelman } 19033ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 1904a5e57a09SHong Zhang if (mumps->id.INFOG(1) < 0) { 19057a46b595SBarry Smith PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 1906c0d63f2fSHong Zhang if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 19079566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1908603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1909c0d63f2fSHong Zhang } else if (mumps->id.INFOG(1) == -13) { 19109566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1911603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1912c0d63f2fSHong Zhang } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 19139566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1914603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 19152aca8efcSHong Zhang } else { 19169566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1917603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OTHER; 1918151787a6SHong Zhang } 19192aca8efcSHong Zhang } 1920aed4548fSBarry Smith PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, " mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16)); 1921397b6df1SKris Buschelman 1922b3cb21ddSStefano Zampini F->assembled = PETSC_TRUE; 1923d47f36abSHong Zhang 1924b3cb21ddSStefano Zampini if (F->schur) { /* reset Schur status to unfactored */ 19253cb7dd0eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 1926c70f7ee4SJunchao Zhang F->schur->offloadmask = PETSC_OFFLOAD_CPU; 19273cb7dd0eSStefano Zampini #endif 1928b3cb21ddSStefano Zampini if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1929b3cb21ddSStefano Zampini mumps->id.ICNTL(19) = 2; 19309566063dSJacob Faibussowitsch PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1931b3cb21ddSStefano Zampini } 19329566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1933b3cb21ddSStefano Zampini } 193467877ebaSShri Abhyankar 1935066565c5SStefano Zampini /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1936066565c5SStefano Zampini if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1937066565c5SStefano Zampini 19383ab56b82SJunchao Zhang if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 19392d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 194067877ebaSShri Abhyankar PetscInt lsol_loc; 194167877ebaSShri Abhyankar PetscScalar *sol_loc; 19422205254eSKarl Rupp 19439566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1944c2093ab7SHong Zhang 1945c2093ab7SHong Zhang /* distributed solution; Create x_seq=sol_loc for repeated use */ 1946c2093ab7SHong Zhang if (mumps->x_seq) { 19479566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 19489566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 19499566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->x_seq)); 1950c2093ab7SHong Zhang } 1951a5e57a09SHong Zhang lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 19529566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1953a5e57a09SHong Zhang mumps->id.lsol_loc = lsol_loc; 1954940cd9d6SSatish Balay mumps->id.sol_loc = (MumpsScalar *)sol_loc; 19559566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 195667877ebaSShri Abhyankar } 19579566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 19583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1959397b6df1SKris Buschelman } 1960397b6df1SKris Buschelman 19619a2535b5SHong Zhang /* Sets MUMPS options from the options database */ 1962d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1963d71ae5a4SJacob Faibussowitsch { 1964e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1965413bcc21SPierre Jolivet PetscMUMPSInt icntl = 0, size, *listvar_schur; 196645e3843bSPierre Jolivet PetscInt info[80], i, ninfo = 80, rbs, cbs; 1967413bcc21SPierre Jolivet PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1968413bcc21SPierre Jolivet MumpsScalar *arr; 1969dcd589f8SShri Abhyankar 1970dcd589f8SShri Abhyankar PetscFunctionBegin; 197126cc229bSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 1972413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1973413bcc21SPierre Jolivet PetscInt nthreads = 0; 1974413bcc21SPierre Jolivet PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1975413bcc21SPierre Jolivet PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1976413bcc21SPierre Jolivet 1977413bcc21SPierre Jolivet mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1978413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 1979413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 1980413bcc21SPierre Jolivet 1981413bcc21SPierre Jolivet PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 1982413bcc21SPierre Jolivet if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1983413bcc21SPierre Jolivet /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1984413bcc21SPierre Jolivet PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 1985413bcc21SPierre Jolivet if (mumps->use_petsc_omp_support) { 19869371c9d4SSatish Balay PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 19879371c9d4SSatish Balay ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1988413bcc21SPierre Jolivet PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1989413bcc21SPierre Jolivet #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1990413bcc21SPierre Jolivet PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 1991413bcc21SPierre Jolivet PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 1992413bcc21SPierre Jolivet #endif 1993413bcc21SPierre Jolivet } else { 1994413bcc21SPierre Jolivet mumps->omp_comm = PETSC_COMM_SELF; 1995413bcc21SPierre Jolivet mumps->mumps_comm = mumps->petsc_comm; 1996413bcc21SPierre Jolivet mumps->is_omp_master = PETSC_TRUE; 1997413bcc21SPierre Jolivet } 1998413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 1999413bcc21SPierre Jolivet mumps->reqs = NULL; 2000413bcc21SPierre Jolivet mumps->tag = 0; 2001413bcc21SPierre Jolivet 2002413bcc21SPierre Jolivet if (mumps->mumps_comm != MPI_COMM_NULL) { 2003413bcc21SPierre Jolivet if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2004413bcc21SPierre Jolivet /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2005413bcc21SPierre Jolivet MPI_Comm comm; 2006413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2007413bcc21SPierre Jolivet mumps->mumps_comm = comm; 2008413bcc21SPierre Jolivet } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2009413bcc21SPierre Jolivet } 2010413bcc21SPierre Jolivet 2011413bcc21SPierre Jolivet mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2012413bcc21SPierre Jolivet mumps->id.job = JOB_INIT; 2013413bcc21SPierre Jolivet mumps->id.par = 1; /* host participates factorizaton and solve */ 2014413bcc21SPierre Jolivet mumps->id.sym = mumps->sym; 2015413bcc21SPierre Jolivet 2016413bcc21SPierre Jolivet size = mumps->id.size_schur; 2017413bcc21SPierre Jolivet arr = mumps->id.schur; 2018413bcc21SPierre Jolivet listvar_schur = mumps->id.listvar_schur; 2019413bcc21SPierre Jolivet PetscMUMPS_c(mumps); 2020413bcc21SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 2021413bcc21SPierre Jolivet /* restore cached ICNTL and CNTL values */ 2022413bcc21SPierre Jolivet for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2023413bcc21SPierre Jolivet for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2024413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->ICNTL_pre)); 2025413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->CNTL_pre)); 2026413bcc21SPierre Jolivet 2027413bcc21SPierre Jolivet if (schur) { 2028413bcc21SPierre Jolivet mumps->id.size_schur = size; 2029413bcc21SPierre Jolivet mumps->id.schur_lld = size; 2030413bcc21SPierre Jolivet mumps->id.schur = arr; 2031413bcc21SPierre Jolivet mumps->id.listvar_schur = listvar_schur; 2032413bcc21SPierre Jolivet if (mumps->petsc_size > 1) { 2033413bcc21SPierre Jolivet PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2034413bcc21SPierre Jolivet 2035413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2036413bcc21SPierre Jolivet gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2037712fec58SPierre Jolivet PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2038413bcc21SPierre Jolivet PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2039413bcc21SPierre Jolivet } else { 2040413bcc21SPierre Jolivet if (F->factortype == MAT_FACTOR_LU) { 2041413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2042413bcc21SPierre Jolivet } else { 2043413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2044413bcc21SPierre Jolivet } 2045413bcc21SPierre Jolivet } 2046413bcc21SPierre Jolivet mumps->id.ICNTL(26) = -1; 2047413bcc21SPierre Jolivet } 2048413bcc21SPierre Jolivet 2049413bcc21SPierre Jolivet /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2050413bcc21SPierre Jolivet For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2051413bcc21SPierre Jolivet */ 2052413bcc21SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2053413bcc21SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2054413bcc21SPierre Jolivet 2055413bcc21SPierre Jolivet mumps->scat_rhs = NULL; 2056413bcc21SPierre Jolivet mumps->scat_sol = NULL; 2057413bcc21SPierre Jolivet 2058413bcc21SPierre Jolivet /* set PETSc-MUMPS default options - override MUMPS default */ 2059413bcc21SPierre Jolivet mumps->id.ICNTL(3) = 0; 2060413bcc21SPierre Jolivet mumps->id.ICNTL(4) = 0; 2061413bcc21SPierre Jolivet if (mumps->petsc_size == 1) { 2062413bcc21SPierre Jolivet mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2063413bcc21SPierre Jolivet mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2064413bcc21SPierre Jolivet } else { 2065413bcc21SPierre Jolivet mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2066413bcc21SPierre Jolivet mumps->id.ICNTL(21) = 1; /* distributed solution */ 2067413bcc21SPierre Jolivet } 2068413bcc21SPierre Jolivet } 20699566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 20709a2535b5SHong Zhang if (flg) mumps->id.ICNTL(1) = icntl; 20719566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 20729a2535b5SHong Zhang if (flg) mumps->id.ICNTL(2) = icntl; 20739566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 20749a2535b5SHong Zhang if (flg) mumps->id.ICNTL(3) = icntl; 2075dcd589f8SShri Abhyankar 20769566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 20779a2535b5SHong Zhang if (flg) mumps->id.ICNTL(4) = icntl; 20789a2535b5SHong Zhang if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 20799a2535b5SHong Zhang 20809566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 20819a2535b5SHong Zhang if (flg) mumps->id.ICNTL(6) = icntl; 20829a2535b5SHong Zhang 20839566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2084dcd589f8SShri Abhyankar if (flg) { 2085aed4548fSBarry Smith PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2086b53c1a7fSBarry Smith mumps->id.ICNTL(7) = icntl; 2087dcd589f8SShri Abhyankar } 2088e0b74bf9SHong Zhang 20899566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 20909566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 20919566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 20929566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 20939566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 20949566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 20959566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 209645e3843bSPierre Jolivet PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 209745e3843bSPierre Jolivet if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 209845e3843bSPierre Jolivet PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 209945e3843bSPierre Jolivet if (flg) { 210045e3843bSPierre Jolivet PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 210145e3843bSPierre Jolivet PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 210245e3843bSPierre Jolivet } 21039566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 210459ac8732SStefano Zampini if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 21059566063dSJacob Faibussowitsch PetscCall(MatDestroy(&F->schur)); 21069566063dSJacob Faibussowitsch PetscCall(MatMumpsResetSchur_Private(mumps)); 210759ac8732SStefano Zampini } 210825aac85cSJunchao Zhang 210943f3b051SJunchao Zhang /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 211043f3b051SJunchao Zhang and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 211125aac85cSJunchao Zhang and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 211243f3b051SJunchao Zhang This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 211343f3b051SJunchao Zhang see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 211443f3b051SJunchao Zhang In short, we could not use distributed RHS with MPICH until v4.0b1. 211525aac85cSJunchao Zhang */ 211643f3b051SJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 211725aac85cSJunchao Zhang mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 211843f3b051SJunchao Zhang #else 211943f3b051SJunchao Zhang mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 212025aac85cSJunchao Zhang #endif 21219566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2122aed4548fSBarry Smith PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 212325aac85cSJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2124aed4548fSBarry Smith PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 212525aac85cSJunchao Zhang #endif 21269566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 21279a2535b5SHong Zhang 21289566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 21299566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 21309566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 21319371c9d4SSatish Balay if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2132d7ebd59bSHong Zhang 21339566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 21349566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 21359566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 21369566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 21379566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 21389566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 21399566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 21409566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 21419566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 21429566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 21439566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 21449566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2145dcd589f8SShri Abhyankar 21469566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 21479566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 21489566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 21499566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 21509566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 21519566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2152e5bb22a1SHong Zhang 21539566063dSJacob Faibussowitsch PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2154b34f08ffSHong Zhang 21559566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2156b34f08ffSHong Zhang if (ninfo) { 215708401ef6SPierre Jolivet PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 21589566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2159b34f08ffSHong Zhang mumps->ninfo = ninfo; 2160b34f08ffSHong Zhang for (i = 0; i < ninfo; i++) { 2161aed4548fSBarry Smith PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2162f7d195e4SLawrence Mitchell mumps->info[i] = info[i]; 2163b34f08ffSHong Zhang } 2164b34f08ffSHong Zhang } 2165d0609cedSBarry Smith PetscOptionsEnd(); 21663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2167dcd589f8SShri Abhyankar } 2168dcd589f8SShri Abhyankar 2169d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2170d71ae5a4SJacob Faibussowitsch { 21715cd7cf9dSHong Zhang PetscFunctionBegin; 21725cd7cf9dSHong Zhang if (mumps->id.INFOG(1) < 0) { 21737a46b595SBarry Smith PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1)); 21745cd7cf9dSHong Zhang if (mumps->id.INFOG(1) == -6) { 21759566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2176603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 21775cd7cf9dSHong Zhang } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 21789566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2179603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2180dbf6bb8dSprj- } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 21819566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "Empty matrix\n")); 21825cd7cf9dSHong Zhang } else { 21839566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2184603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OTHER; 21855cd7cf9dSHong Zhang } 21865cd7cf9dSHong Zhang } 21873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 21885cd7cf9dSHong Zhang } 21895cd7cf9dSHong Zhang 2190d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2191d71ae5a4SJacob Faibussowitsch { 2192e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 219367877ebaSShri Abhyankar Vec b; 219467877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 2195397b6df1SKris Buschelman 2196397b6df1SKris Buschelman PetscFunctionBegin; 2197d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2198d47f36abSHong Zhang /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 21993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2200d47f36abSHong Zhang } 2201dcd589f8SShri Abhyankar 22029a2535b5SHong Zhang /* Set MUMPS options from the options database */ 220326cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 2204dcd589f8SShri Abhyankar 22059566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 22069566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2207dcd589f8SShri Abhyankar 220867877ebaSShri Abhyankar /* analysis phase */ 2209a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 2210a5e57a09SHong Zhang mumps->id.n = M; 2211a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 221267877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 2213a5e57a09SHong Zhang if (!mumps->myid) { 2214a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 2215a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2216a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2217a6053eceSJunchao Zhang if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 22184ac6704cSBarry Smith if (r) { 22194ac6704cSBarry Smith mumps->id.ICNTL(7) = 1; 2220a5e57a09SHong Zhang if (!mumps->myid) { 2221e0b74bf9SHong Zhang const PetscInt *idx; 2222a6053eceSJunchao Zhang PetscInt i; 22232205254eSKarl Rupp 22249566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 22259566063dSJacob Faibussowitsch PetscCall(ISGetIndices(r, &idx)); 22269566063dSJacob Faibussowitsch for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 22279566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(r, &idx)); 2228e0b74bf9SHong Zhang } 2229e0b74bf9SHong Zhang } 223067877ebaSShri Abhyankar } 223167877ebaSShri Abhyankar break; 223267877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2233a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2234a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2235a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2236a6053eceSJunchao Zhang if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 223725aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 22389566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 22399566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 22409566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 224125aac85cSJunchao Zhang } 224267877ebaSShri Abhyankar break; 224367877ebaSShri Abhyankar } 22443ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 22459566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 224667877ebaSShri Abhyankar 2247719d5645SBarry Smith F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2248dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 224951d5961aSHong Zhang F->ops->solvetranspose = MatSolveTranspose_MUMPS; 22504e34a73bSHong Zhang F->ops->matsolve = MatMatSolve_MUMPS; 2251eb3ef3b2SHong Zhang F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2252b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2253d47f36abSHong Zhang 2254d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 22553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2256b24902e0SBarry Smith } 2257b24902e0SBarry Smith 2258450b117fSShri Abhyankar /* Note the Petsc r and c permutations are ignored */ 2259d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2260d71ae5a4SJacob Faibussowitsch { 2261e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 226267877ebaSShri Abhyankar Vec b; 226367877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 2264450b117fSShri Abhyankar 2265450b117fSShri Abhyankar PetscFunctionBegin; 2266d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2267338d3105SPierre Jolivet /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 22683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2269d47f36abSHong Zhang } 2270dcd589f8SShri Abhyankar 22719a2535b5SHong Zhang /* Set MUMPS options from the options database */ 227226cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 2273dcd589f8SShri Abhyankar 22749566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 22759566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 227667877ebaSShri Abhyankar 227767877ebaSShri Abhyankar /* analysis phase */ 2278a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 2279a5e57a09SHong Zhang mumps->id.n = M; 2280a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 228167877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 2282a5e57a09SHong Zhang if (!mumps->myid) { 2283a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 2284a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2285a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2286ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 228767877ebaSShri Abhyankar } 228867877ebaSShri Abhyankar break; 228967877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2290a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2291a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2292a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2293ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 229425aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 22959566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 22969566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 22979566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 229825aac85cSJunchao Zhang } 229967877ebaSShri Abhyankar break; 230067877ebaSShri Abhyankar } 23013ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 23029566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 230367877ebaSShri Abhyankar 2304450b117fSShri Abhyankar F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2305dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 230651d5961aSHong Zhang F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2307b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2308d47f36abSHong Zhang 2309d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 23103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2311450b117fSShri Abhyankar } 2312b24902e0SBarry Smith 2313141f4205SHong Zhang /* Note the Petsc r permutation and factor info are ignored */ 2314d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2315d71ae5a4SJacob Faibussowitsch { 2316e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 231767877ebaSShri Abhyankar Vec b; 231867877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 2319397b6df1SKris Buschelman 2320397b6df1SKris Buschelman PetscFunctionBegin; 2321d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2322338d3105SPierre Jolivet /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 23233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2324d47f36abSHong Zhang } 2325dcd589f8SShri Abhyankar 23269a2535b5SHong Zhang /* Set MUMPS options from the options database */ 232726cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 2328dcd589f8SShri Abhyankar 23299566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 23309566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2331dcd589f8SShri Abhyankar 233267877ebaSShri Abhyankar /* analysis phase */ 2333a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 2334a5e57a09SHong Zhang mumps->id.n = M; 2335a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 233667877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 2337a5e57a09SHong Zhang if (!mumps->myid) { 2338a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 2339a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2340a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2341ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 234267877ebaSShri Abhyankar } 234367877ebaSShri Abhyankar break; 234467877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2345a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2346a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2347a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2348ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 234925aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 23509566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 23519566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 23529566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 235325aac85cSJunchao Zhang } 235467877ebaSShri Abhyankar break; 235567877ebaSShri Abhyankar } 23563ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 23579566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 23585cd7cf9dSHong Zhang 23592792810eSHong Zhang F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2360dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 236151d5961aSHong Zhang F->ops->solvetranspose = MatSolve_MUMPS; 23624e34a73bSHong Zhang F->ops->matsolve = MatMatSolve_MUMPS; 236323a5080aSHong Zhang F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2364b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 23654e34a73bSHong Zhang #if defined(PETSC_USE_COMPLEX) 23660298fd71SBarry Smith F->ops->getinertia = NULL; 23674e34a73bSHong Zhang #else 23684e34a73bSHong Zhang F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2369db4efbfdSBarry Smith #endif 2370d47f36abSHong Zhang 2371d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 23723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2373b24902e0SBarry Smith } 2374b24902e0SBarry Smith 2375d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2376d71ae5a4SJacob Faibussowitsch { 237764e6c443SBarry Smith PetscBool iascii; 237864e6c443SBarry Smith PetscViewerFormat format; 2379e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2380f6c57405SHong Zhang 2381f6c57405SHong Zhang PetscFunctionBegin; 238264e6c443SBarry Smith /* check if matrix is mumps type */ 23833ba16761SJacob Faibussowitsch if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 238464e6c443SBarry Smith 23859566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 238664e6c443SBarry Smith if (iascii) { 23879566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 23881511cd71SPierre Jolivet if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 23899566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 23901511cd71SPierre Jolivet if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 23919566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 23929566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 23939566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 23949566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 23959566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 23969566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 23979566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 23989566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 23999566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 24009566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 24019566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 24029566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2403a5e57a09SHong Zhang if (mumps->id.ICNTL(11) > 0) { 24049566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 24059566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 24069566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 24079566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 24089566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 24099566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2410f6c57405SHong Zhang } 24119566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 24129566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 24139566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 241445e3843bSPierre Jolivet PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2415f6c57405SHong Zhang /* ICNTL(15-17) not used */ 24169566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 24179566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 24189566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 24199566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 24209566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 24219566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2422c0165424SHong Zhang 24239566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 24249566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 24259566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 24269566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 24279566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 24289566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 242942179a6aSHong Zhang 24309566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 24319566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 24329566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 24339566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 24349566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 24359566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2436f6c57405SHong Zhang 24379566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 24389566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 24399566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 24409566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 24419566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 24429566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2443f6c57405SHong Zhang 2444a5b23f4aSJose E. Roman /* information local to each processor */ 24459566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 24469566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 24479566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 24489566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 24499566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 24509566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 24519566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 24529566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 24539566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 24549566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2455f6c57405SHong Zhang 24569566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 24579566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 24589566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2459f6c57405SHong Zhang 24609566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 24619566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 24629566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2463f6c57405SHong Zhang 24649566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 24659566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 24669566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2467b34f08ffSHong Zhang 2468a0e18203SThibaut Appel if (mumps->ninfo && mumps->ninfo <= 80) { 2469b34f08ffSHong Zhang PetscInt i; 2470b34f08ffSHong Zhang for (i = 0; i < mumps->ninfo; i++) { 24719566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 24729566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 24739566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2474b34f08ffSHong Zhang } 2475b34f08ffSHong Zhang } 24769566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 24771511cd71SPierre Jolivet } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2478f6c57405SHong Zhang 24791511cd71SPierre Jolivet if (mumps->myid == 0) { /* information from the host */ 24809566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 24819566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 24829566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 24839566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2484f6c57405SHong Zhang 24859566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 24869566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 24879566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 24889566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 24899566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 24909566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 24919566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 24929566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 24939566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 24949566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 24959566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 24969566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 24979566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 24989566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 24999566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 25009566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 25019566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 25029566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 25039566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 25049566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 25059566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 25069566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 25079566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 25089566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 25099566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 25109566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 25119566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 25129566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 25139566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 25149566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 25159566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 25169566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 25179566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 25189566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2519f6c57405SHong Zhang } 2520f6c57405SHong Zhang } 2521cb828f0fSHong Zhang } 25223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2523f6c57405SHong Zhang } 2524f6c57405SHong Zhang 2525d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2526d71ae5a4SJacob Faibussowitsch { 2527e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 252835bd34faSBarry Smith 252935bd34faSBarry Smith PetscFunctionBegin; 253035bd34faSBarry Smith info->block_size = 1.0; 253164412097SPierre Jolivet info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 253264412097SPierre Jolivet info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 253335bd34faSBarry Smith info->nz_unneeded = 0.0; 253435bd34faSBarry Smith info->assemblies = 0.0; 253535bd34faSBarry Smith info->mallocs = 0.0; 253635bd34faSBarry Smith info->memory = 0.0; 253735bd34faSBarry Smith info->fill_ratio_given = 0; 253835bd34faSBarry Smith info->fill_ratio_needed = 0; 253935bd34faSBarry Smith info->factor_mallocs = 0; 25403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 254135bd34faSBarry Smith } 254235bd34faSBarry Smith 2543d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2544d71ae5a4SJacob Faibussowitsch { 2545e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2546a3d589ffSStefano Zampini const PetscScalar *arr; 25478e7ba810SStefano Zampini const PetscInt *idxs; 25488e7ba810SStefano Zampini PetscInt size, i; 25496444a565SStefano Zampini 25506444a565SStefano Zampini PetscFunctionBegin; 25519566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is, &size)); 2552b3cb21ddSStefano Zampini /* Schur complement matrix */ 25539566063dSJacob Faibussowitsch PetscCall(MatDestroy(&F->schur)); 25549566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 25559566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2556a3d589ffSStefano Zampini mumps->id.schur = (MumpsScalar *)arr; 2557a3d589ffSStefano Zampini mumps->id.size_schur = size; 2558a3d589ffSStefano Zampini mumps->id.schur_lld = size; 25599566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 256048a46eb9SPierre Jolivet if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2561b3cb21ddSStefano Zampini 2562b3cb21ddSStefano Zampini /* MUMPS expects Fortran style indices */ 25639566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.listvar_schur)); 25649566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 25659566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is, &idxs)); 25669566063dSJacob Faibussowitsch for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 25679566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is, &idxs)); 256859ac8732SStefano Zampini /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2569b5fa320bSStefano Zampini mumps->id.ICNTL(26) = -1; 25703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25716444a565SStefano Zampini } 257259ac8732SStefano Zampini 2573d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2574d71ae5a4SJacob Faibussowitsch { 25756444a565SStefano Zampini Mat St; 2576e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 25776444a565SStefano Zampini PetscScalar *array; 25786444a565SStefano Zampini #if defined(PETSC_USE_COMPLEX) 25798ac429a0SStefano Zampini PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 25806444a565SStefano Zampini #endif 25816444a565SStefano Zampini 25826444a565SStefano Zampini PetscFunctionBegin; 258308401ef6SPierre Jolivet PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 25849566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 25859566063dSJacob Faibussowitsch PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 25869566063dSJacob Faibussowitsch PetscCall(MatSetType(St, MATDENSE)); 25879566063dSJacob Faibussowitsch PetscCall(MatSetUp(St)); 25889566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(St, &array)); 258959ac8732SStefano Zampini if (!mumps->sym) { /* MUMPS always return a full matrix */ 25906444a565SStefano Zampini if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 25916444a565SStefano Zampini PetscInt i, j, N = mumps->id.size_schur; 25926444a565SStefano Zampini for (i = 0; i < N; i++) { 25936444a565SStefano Zampini for (j = 0; j < N; j++) { 25946444a565SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 25956444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j]; 25966444a565SStefano Zampini #else 25976444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 25986444a565SStefano Zampini #endif 25996444a565SStefano Zampini array[j * N + i] = val; 26006444a565SStefano Zampini } 26016444a565SStefano Zampini } 26026444a565SStefano Zampini } else { /* stored by columns */ 26039566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 26046444a565SStefano Zampini } 26056444a565SStefano Zampini } else { /* either full or lower-triangular (not packed) */ 26066444a565SStefano Zampini if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 26076444a565SStefano Zampini PetscInt i, j, N = mumps->id.size_schur; 26086444a565SStefano Zampini for (i = 0; i < N; i++) { 26096444a565SStefano Zampini for (j = i; j < N; j++) { 26106444a565SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 26116444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j]; 26126444a565SStefano Zampini #else 26136444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 26146444a565SStefano Zampini #endif 26156444a565SStefano Zampini array[i * N + j] = val; 26166444a565SStefano Zampini array[j * N + i] = val; 26176444a565SStefano Zampini } 26186444a565SStefano Zampini } 26196444a565SStefano Zampini } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 26209566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 26216444a565SStefano Zampini } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 26226444a565SStefano Zampini PetscInt i, j, N = mumps->id.size_schur; 26236444a565SStefano Zampini for (i = 0; i < N; i++) { 26246444a565SStefano Zampini for (j = 0; j < i + 1; j++) { 26256444a565SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 26266444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j]; 26276444a565SStefano Zampini #else 26286444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 26296444a565SStefano Zampini #endif 26306444a565SStefano Zampini array[i * N + j] = val; 26316444a565SStefano Zampini array[j * N + i] = val; 26326444a565SStefano Zampini } 26336444a565SStefano Zampini } 26346444a565SStefano Zampini } 26356444a565SStefano Zampini } 26369566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(St, &array)); 26376444a565SStefano Zampini *S = St; 26383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 26396444a565SStefano Zampini } 26406444a565SStefano Zampini 2641d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2642d71ae5a4SJacob Faibussowitsch { 2643e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 26445ccb76cbSHong Zhang 26455ccb76cbSHong Zhang PetscFunctionBegin; 2646413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2647413bcc21SPierre Jolivet PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 26489371c9d4SSatish Balay for (i = 0; i < nICNTL_pre; ++i) 26499371c9d4SSatish Balay if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2650413bcc21SPierre Jolivet if (i == nICNTL_pre) { /* not already cached */ 2651413bcc21SPierre Jolivet if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2652413bcc21SPierre Jolivet else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2653413bcc21SPierre Jolivet mumps->ICNTL_pre[0]++; 2654413bcc21SPierre Jolivet } 2655413bcc21SPierre Jolivet mumps->ICNTL_pre[1 + 2 * i] = icntl; 2656413bcc21SPierre Jolivet PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2657413bcc21SPierre Jolivet } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 26583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 26595ccb76cbSHong Zhang } 26605ccb76cbSHong Zhang 2661d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2662d71ae5a4SJacob Faibussowitsch { 2663e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2664bc6112feSHong Zhang 2665bc6112feSHong Zhang PetscFunctionBegin; 266636df9881Sjeremy theler if (mumps->id.job == JOB_NULL) { 266736df9881Sjeremy theler PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 266836df9881Sjeremy theler *ival = 0; 266936df9881Sjeremy theler for (i = 0; i < nICNTL_pre; ++i) { 267036df9881Sjeremy theler if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 267136df9881Sjeremy theler } 267236df9881Sjeremy theler } else *ival = mumps->id.ICNTL(icntl); 26733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2674bc6112feSHong Zhang } 2675bc6112feSHong Zhang 26765ccb76cbSHong Zhang /*@ 26775ccb76cbSHong Zhang MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 26785ccb76cbSHong Zhang 2679c3339decSBarry Smith Logically Collective 26805ccb76cbSHong Zhang 26815ccb76cbSHong Zhang Input Parameters: 268211a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 26835ccb76cbSHong Zhang . icntl - index of MUMPS parameter array ICNTL() 26845ccb76cbSHong Zhang - ival - value of MUMPS ICNTL(icntl) 26855ccb76cbSHong Zhang 26863c7db156SBarry Smith Options Database Key: 2687147403d9SBarry Smith . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 26885ccb76cbSHong Zhang 26895ccb76cbSHong Zhang Level: beginner 26905ccb76cbSHong Zhang 269196a0c994SBarry Smith References: 2692606c0280SSatish Balay . * - MUMPS Users' Guide 26935ccb76cbSHong Zhang 26941cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 26955ccb76cbSHong Zhang @*/ 2696d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2697d71ae5a4SJacob Faibussowitsch { 26985ccb76cbSHong Zhang PetscFunctionBegin; 26992989dfd4SHong Zhang PetscValidType(F, 1); 270028b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 27015ccb76cbSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 27025ccb76cbSHong Zhang PetscValidLogicalCollectiveInt(F, ival, 3); 2703413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2704cac4c232SBarry Smith PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 27053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 27065ccb76cbSHong Zhang } 27075ccb76cbSHong Zhang 2708a21f80fcSHong Zhang /*@ 2709a21f80fcSHong Zhang MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2710a21f80fcSHong Zhang 2711c3339decSBarry Smith Logically Collective 2712a21f80fcSHong Zhang 2713a21f80fcSHong Zhang Input Parameters: 271411a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2715a21f80fcSHong Zhang - icntl - index of MUMPS parameter array ICNTL() 2716a21f80fcSHong Zhang 2717a21f80fcSHong Zhang Output Parameter: 2718a21f80fcSHong Zhang . ival - value of MUMPS ICNTL(icntl) 2719a21f80fcSHong Zhang 2720a21f80fcSHong Zhang Level: beginner 2721a21f80fcSHong Zhang 272296a0c994SBarry Smith References: 2723606c0280SSatish Balay . * - MUMPS Users' Guide 2724a21f80fcSHong Zhang 27251cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2726a21f80fcSHong Zhang @*/ 2727d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2728d71ae5a4SJacob Faibussowitsch { 2729bc6112feSHong Zhang PetscFunctionBegin; 27302989dfd4SHong Zhang PetscValidType(F, 1); 273128b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2732bc6112feSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 2733bc6112feSHong Zhang PetscValidIntPointer(ival, 3); 2734413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2735cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 27363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2737bc6112feSHong Zhang } 2738bc6112feSHong Zhang 2739d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2740d71ae5a4SJacob Faibussowitsch { 2741e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 27428928b65cSHong Zhang 27438928b65cSHong Zhang PetscFunctionBegin; 2744413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { 2745413bcc21SPierre Jolivet PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 27469371c9d4SSatish Balay for (i = 0; i < nCNTL_pre; ++i) 27479371c9d4SSatish Balay if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2748413bcc21SPierre Jolivet if (i == nCNTL_pre) { 2749413bcc21SPierre Jolivet if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2750413bcc21SPierre Jolivet else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2751413bcc21SPierre Jolivet mumps->CNTL_pre[0]++; 2752413bcc21SPierre Jolivet } 2753413bcc21SPierre Jolivet mumps->CNTL_pre[1 + 2 * i] = icntl; 2754413bcc21SPierre Jolivet mumps->CNTL_pre[2 + 2 * i] = val; 2755413bcc21SPierre Jolivet } else mumps->id.CNTL(icntl) = val; 27563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 27578928b65cSHong Zhang } 27588928b65cSHong Zhang 2759d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2760d71ae5a4SJacob Faibussowitsch { 2761e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2762bc6112feSHong Zhang 2763bc6112feSHong Zhang PetscFunctionBegin; 276436df9881Sjeremy theler if (mumps->id.job == JOB_NULL) { 276536df9881Sjeremy theler PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 276636df9881Sjeremy theler *val = 0.0; 276736df9881Sjeremy theler for (i = 0; i < nCNTL_pre; ++i) { 276836df9881Sjeremy theler if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 276936df9881Sjeremy theler } 277036df9881Sjeremy theler } else *val = mumps->id.CNTL(icntl); 27713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2772bc6112feSHong Zhang } 2773bc6112feSHong Zhang 27748928b65cSHong Zhang /*@ 27758928b65cSHong Zhang MatMumpsSetCntl - Set MUMPS parameter CNTL() 27768928b65cSHong Zhang 2777c3339decSBarry Smith Logically Collective 27788928b65cSHong Zhang 27798928b65cSHong Zhang Input Parameters: 278011a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 27818928b65cSHong Zhang . icntl - index of MUMPS parameter array CNTL() 27828928b65cSHong Zhang - val - value of MUMPS CNTL(icntl) 27838928b65cSHong Zhang 27843c7db156SBarry Smith Options Database Key: 2785147403d9SBarry Smith . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 27868928b65cSHong Zhang 27878928b65cSHong Zhang Level: beginner 27888928b65cSHong Zhang 278996a0c994SBarry Smith References: 2790606c0280SSatish Balay . * - MUMPS Users' Guide 27918928b65cSHong Zhang 27921cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 27938928b65cSHong Zhang @*/ 2794d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2795d71ae5a4SJacob Faibussowitsch { 27968928b65cSHong Zhang PetscFunctionBegin; 27972989dfd4SHong Zhang PetscValidType(F, 1); 279828b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 27998928b65cSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 2800bc6112feSHong Zhang PetscValidLogicalCollectiveReal(F, val, 3); 2801413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2802cac4c232SBarry Smith PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 28033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28048928b65cSHong Zhang } 28058928b65cSHong Zhang 2806a21f80fcSHong Zhang /*@ 2807a21f80fcSHong Zhang MatMumpsGetCntl - Get MUMPS parameter CNTL() 2808a21f80fcSHong Zhang 2809c3339decSBarry Smith Logically Collective 2810a21f80fcSHong Zhang 2811a21f80fcSHong Zhang Input Parameters: 281211a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2813a21f80fcSHong Zhang - icntl - index of MUMPS parameter array CNTL() 2814a21f80fcSHong Zhang 2815a21f80fcSHong Zhang Output Parameter: 2816a21f80fcSHong Zhang . val - value of MUMPS CNTL(icntl) 2817a21f80fcSHong Zhang 2818a21f80fcSHong Zhang Level: beginner 2819a21f80fcSHong Zhang 282096a0c994SBarry Smith References: 2821606c0280SSatish Balay . * - MUMPS Users' Guide 2822a21f80fcSHong Zhang 28231cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2824a21f80fcSHong Zhang @*/ 2825d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2826d71ae5a4SJacob Faibussowitsch { 2827bc6112feSHong Zhang PetscFunctionBegin; 28282989dfd4SHong Zhang PetscValidType(F, 1); 282928b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2830bc6112feSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 2831bc6112feSHong Zhang PetscValidRealPointer(val, 3); 2832413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2833cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 28343ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2835bc6112feSHong Zhang } 2836bc6112feSHong Zhang 2837d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2838d71ae5a4SJacob Faibussowitsch { 2839e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2840bc6112feSHong Zhang 2841bc6112feSHong Zhang PetscFunctionBegin; 2842bc6112feSHong Zhang *info = mumps->id.INFO(icntl); 28433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2844bc6112feSHong Zhang } 2845bc6112feSHong Zhang 2846d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2847d71ae5a4SJacob Faibussowitsch { 2848e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2849bc6112feSHong Zhang 2850bc6112feSHong Zhang PetscFunctionBegin; 2851bc6112feSHong Zhang *infog = mumps->id.INFOG(icntl); 28523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2853bc6112feSHong Zhang } 2854bc6112feSHong Zhang 2855d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2856d71ae5a4SJacob Faibussowitsch { 2857e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2858bc6112feSHong Zhang 2859bc6112feSHong Zhang PetscFunctionBegin; 2860bc6112feSHong Zhang *rinfo = mumps->id.RINFO(icntl); 28613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2862bc6112feSHong Zhang } 2863bc6112feSHong Zhang 2864d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2865d71ae5a4SJacob Faibussowitsch { 2866e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2867bc6112feSHong Zhang 2868bc6112feSHong Zhang PetscFunctionBegin; 2869bc6112feSHong Zhang *rinfog = mumps->id.RINFOG(icntl); 28703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2871bc6112feSHong Zhang } 2872bc6112feSHong Zhang 28735c0bae8cSAshish Patel PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 28745c0bae8cSAshish Patel { 28755c0bae8cSAshish Patel Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 28765c0bae8cSAshish Patel 28775c0bae8cSAshish Patel PetscFunctionBegin; 28785c0bae8cSAshish Patel PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 28795c0bae8cSAshish Patel *size = 0; 28805c0bae8cSAshish Patel *array = NULL; 28815c0bae8cSAshish Patel if (!mumps->myid) { 28825c0bae8cSAshish Patel *size = mumps->id.INFOG(28); 28835c0bae8cSAshish Patel PetscCall(PetscMalloc1(*size, array)); 28845c0bae8cSAshish Patel for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 28855c0bae8cSAshish Patel } 28865c0bae8cSAshish Patel PetscFunctionReturn(PETSC_SUCCESS); 28875c0bae8cSAshish Patel } 28885c0bae8cSAshish Patel 2889d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2890d71ae5a4SJacob Faibussowitsch { 28910e6b8875SHong Zhang Mat Bt = NULL, Btseq = NULL; 28920e6b8875SHong Zhang PetscBool flg; 2893bb599dfdSHong Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2894bb599dfdSHong Zhang PetscScalar *aa; 2895f410b75aSHong Zhang PetscInt spnr, *ia, *ja, M, nrhs; 2896bb599dfdSHong Zhang 2897bb599dfdSHong Zhang PetscFunctionBegin; 2898064a246eSJacob Faibussowitsch PetscValidPointer(spRHS, 2); 2899013e2dc7SBarry Smith PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 29000e6b8875SHong Zhang if (flg) { 29019566063dSJacob Faibussowitsch PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2902013e2dc7SBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2903bb599dfdSHong Zhang 29049566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2905bb599dfdSHong Zhang 29062d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 29070e6b8875SHong Zhang Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 29080e6b8875SHong Zhang Btseq = b->A; 29090e6b8875SHong Zhang } else { 29100e6b8875SHong Zhang Btseq = Bt; 29110e6b8875SHong Zhang } 29120e6b8875SHong Zhang 29139566063dSJacob Faibussowitsch PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2914f410b75aSHong Zhang mumps->id.nrhs = nrhs; 2915f410b75aSHong Zhang mumps->id.lrhs = M; 2916f410b75aSHong Zhang mumps->id.rhs = NULL; 2917f410b75aSHong Zhang 2918e3f2db6aSHong Zhang if (!mumps->myid) { 29199566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 29209566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 292128b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 29229566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2923bb599dfdSHong Zhang mumps->id.rhs_sparse = (MumpsScalar *)aa; 2924e3f2db6aSHong Zhang } else { 2925e3f2db6aSHong Zhang mumps->id.irhs_ptr = NULL; 2926e3f2db6aSHong Zhang mumps->id.irhs_sparse = NULL; 2927e3f2db6aSHong Zhang mumps->id.nz_rhs = 0; 2928e3f2db6aSHong Zhang mumps->id.rhs_sparse = NULL; 2929e3f2db6aSHong Zhang } 2930bb599dfdSHong Zhang mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2931e3f2db6aSHong Zhang mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2932bb599dfdSHong Zhang 2933bb599dfdSHong Zhang /* solve phase */ 2934bb599dfdSHong Zhang mumps->id.job = JOB_SOLVE; 29353ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 2936049d1499SBarry Smith PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 293714267174SHong Zhang 2938e3f2db6aSHong Zhang if (!mumps->myid) { 29399566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 29409566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 294128b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2942e3f2db6aSHong Zhang } 29433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2944bb599dfdSHong Zhang } 2945bb599dfdSHong Zhang 2946bb599dfdSHong Zhang /*@ 29472ef1f0ffSBarry Smith MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 2948bb599dfdSHong Zhang 2949c3339decSBarry Smith Logically Collective 2950bb599dfdSHong Zhang 295120f4b53cSBarry Smith Input Parameter: 295220f4b53cSBarry Smith . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2953bb599dfdSHong Zhang 2954bb599dfdSHong Zhang Output Parameter: 295520f4b53cSBarry Smith . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 2956bb599dfdSHong Zhang 2957bb599dfdSHong Zhang Level: beginner 2958bb599dfdSHong Zhang 2959bb599dfdSHong Zhang References: 2960606c0280SSatish Balay . * - MUMPS Users' Guide 2961bb599dfdSHong Zhang 29621cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 2963bb599dfdSHong Zhang @*/ 2964d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2965d71ae5a4SJacob Faibussowitsch { 2966bb599dfdSHong Zhang PetscFunctionBegin; 2967bb599dfdSHong Zhang PetscValidType(F, 1); 296828b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2969cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 29703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2971bb599dfdSHong Zhang } 2972bb599dfdSHong Zhang 2973d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 2974d71ae5a4SJacob Faibussowitsch { 29750e6b8875SHong Zhang Mat spRHS; 29760e6b8875SHong Zhang 29770e6b8875SHong Zhang PetscFunctionBegin; 29789566063dSJacob Faibussowitsch PetscCall(MatCreateTranspose(spRHST, &spRHS)); 29799566063dSJacob Faibussowitsch PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 29809566063dSJacob Faibussowitsch PetscCall(MatDestroy(&spRHS)); 29813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 29820e6b8875SHong Zhang } 29830e6b8875SHong Zhang 29840e6b8875SHong Zhang /*@ 29852ef1f0ffSBarry Smith MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 29860e6b8875SHong Zhang 2987c3339decSBarry Smith Logically Collective 29880e6b8875SHong Zhang 298920f4b53cSBarry Smith Input Parameter: 299020f4b53cSBarry Smith . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 29910e6b8875SHong Zhang 29920e6b8875SHong Zhang Output Parameter: 299320f4b53cSBarry Smith . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 29940e6b8875SHong Zhang 29950e6b8875SHong Zhang Level: beginner 29960e6b8875SHong Zhang 29970e6b8875SHong Zhang References: 2998606c0280SSatish Balay . * - MUMPS Users' Guide 29990e6b8875SHong Zhang 30001cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 30010e6b8875SHong Zhang @*/ 3002d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3003d71ae5a4SJacob Faibussowitsch { 30040e6b8875SHong Zhang PetscBool flg; 30050e6b8875SHong Zhang 30060e6b8875SHong Zhang PetscFunctionBegin; 30070e6b8875SHong Zhang PetscValidType(F, 1); 300828b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 30099566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 301028b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 30110e6b8875SHong Zhang 3012cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 30133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 30140e6b8875SHong Zhang } 30150e6b8875SHong Zhang 3016a21f80fcSHong Zhang /*@ 3017a21f80fcSHong Zhang MatMumpsGetInfo - Get MUMPS parameter INFO() 3018a21f80fcSHong Zhang 3019c3339decSBarry Smith Logically Collective 3020a21f80fcSHong Zhang 3021a21f80fcSHong Zhang Input Parameters: 302211a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3023a21f80fcSHong Zhang - icntl - index of MUMPS parameter array INFO() 3024a21f80fcSHong Zhang 3025a21f80fcSHong Zhang Output Parameter: 3026a21f80fcSHong Zhang . ival - value of MUMPS INFO(icntl) 3027a21f80fcSHong Zhang 3028a21f80fcSHong Zhang Level: beginner 3029a21f80fcSHong Zhang 303096a0c994SBarry Smith References: 3031606c0280SSatish Balay . * - MUMPS Users' Guide 3032a21f80fcSHong Zhang 30331cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3034a21f80fcSHong Zhang @*/ 3035d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3036d71ae5a4SJacob Faibussowitsch { 3037bc6112feSHong Zhang PetscFunctionBegin; 30382989dfd4SHong Zhang PetscValidType(F, 1); 303928b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3040ca810319SHong Zhang PetscValidIntPointer(ival, 3); 3041cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 30423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3043bc6112feSHong Zhang } 3044bc6112feSHong Zhang 3045a21f80fcSHong Zhang /*@ 3046a21f80fcSHong Zhang MatMumpsGetInfog - Get MUMPS parameter INFOG() 3047a21f80fcSHong Zhang 3048c3339decSBarry Smith Logically Collective 3049a21f80fcSHong Zhang 3050a21f80fcSHong Zhang Input Parameters: 305111a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3052a21f80fcSHong Zhang - icntl - index of MUMPS parameter array INFOG() 3053a21f80fcSHong Zhang 3054a21f80fcSHong Zhang Output Parameter: 3055a21f80fcSHong Zhang . ival - value of MUMPS INFOG(icntl) 3056a21f80fcSHong Zhang 3057a21f80fcSHong Zhang Level: beginner 3058a21f80fcSHong Zhang 305996a0c994SBarry Smith References: 3060606c0280SSatish Balay . * - MUMPS Users' Guide 3061a21f80fcSHong Zhang 30621cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3063a21f80fcSHong Zhang @*/ 3064d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3065d71ae5a4SJacob Faibussowitsch { 3066bc6112feSHong Zhang PetscFunctionBegin; 30672989dfd4SHong Zhang PetscValidType(F, 1); 306828b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3069ca810319SHong Zhang PetscValidIntPointer(ival, 3); 3070cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 30713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3072bc6112feSHong Zhang } 3073bc6112feSHong Zhang 3074a21f80fcSHong Zhang /*@ 3075a21f80fcSHong Zhang MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3076a21f80fcSHong Zhang 3077c3339decSBarry Smith Logically Collective 3078a21f80fcSHong Zhang 3079a21f80fcSHong Zhang Input Parameters: 308011a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3081a21f80fcSHong Zhang - icntl - index of MUMPS parameter array RINFO() 3082a21f80fcSHong Zhang 3083a21f80fcSHong Zhang Output Parameter: 3084a21f80fcSHong Zhang . val - value of MUMPS RINFO(icntl) 3085a21f80fcSHong Zhang 3086a21f80fcSHong Zhang Level: beginner 3087a21f80fcSHong Zhang 308896a0c994SBarry Smith References: 3089606c0280SSatish Balay . * - MUMPS Users' Guide 3090a21f80fcSHong Zhang 30911cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3092a21f80fcSHong Zhang @*/ 3093d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3094d71ae5a4SJacob Faibussowitsch { 3095bc6112feSHong Zhang PetscFunctionBegin; 30962989dfd4SHong Zhang PetscValidType(F, 1); 309728b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3098bc6112feSHong Zhang PetscValidRealPointer(val, 3); 3099cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 31003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3101bc6112feSHong Zhang } 3102bc6112feSHong Zhang 3103a21f80fcSHong Zhang /*@ 3104a21f80fcSHong Zhang MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3105a21f80fcSHong Zhang 3106c3339decSBarry Smith Logically Collective 3107a21f80fcSHong Zhang 3108a21f80fcSHong Zhang Input Parameters: 310911a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3110a21f80fcSHong Zhang - icntl - index of MUMPS parameter array RINFOG() 3111a21f80fcSHong Zhang 3112a21f80fcSHong Zhang Output Parameter: 3113a21f80fcSHong Zhang . val - value of MUMPS RINFOG(icntl) 3114a21f80fcSHong Zhang 3115a21f80fcSHong Zhang Level: beginner 3116a21f80fcSHong Zhang 311796a0c994SBarry Smith References: 3118606c0280SSatish Balay . * - MUMPS Users' Guide 3119a21f80fcSHong Zhang 31201cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3121a21f80fcSHong Zhang @*/ 3122d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3123d71ae5a4SJacob Faibussowitsch { 3124bc6112feSHong Zhang PetscFunctionBegin; 31252989dfd4SHong Zhang PetscValidType(F, 1); 312628b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3127bc6112feSHong Zhang PetscValidRealPointer(val, 3); 3128cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 31293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3130bc6112feSHong Zhang } 3131bc6112feSHong Zhang 31325c0bae8cSAshish Patel /*@ 31335c0bae8cSAshish Patel MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 31345c0bae8cSAshish Patel 31355c0bae8cSAshish Patel Logically Collective 31365c0bae8cSAshish Patel 31375c0bae8cSAshish Patel Input Parameter: 31385c0bae8cSAshish Patel . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 31395c0bae8cSAshish Patel 31405c0bae8cSAshish Patel Output Parameters: 31415c0bae8cSAshish Patel + size - local size of the array. The size of the array is non-zero only on the host. 31425c0bae8cSAshish Patel - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 31435c0bae8cSAshish Patel for freeing this array. 31445c0bae8cSAshish Patel 31455c0bae8cSAshish Patel Level: beginner 31465c0bae8cSAshish Patel 31475c0bae8cSAshish Patel References: 31485c0bae8cSAshish Patel . * - MUMPS Users' Guide 31495c0bae8cSAshish Patel 31501cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 31515c0bae8cSAshish Patel @*/ 31525c0bae8cSAshish Patel PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 31535c0bae8cSAshish Patel { 31545c0bae8cSAshish Patel PetscFunctionBegin; 31555c0bae8cSAshish Patel PetscValidType(F, 1); 31565c0bae8cSAshish Patel PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3157*0d6f747bSJacob Faibussowitsch PetscValidIntPointer(size, 2); 3158*0d6f747bSJacob Faibussowitsch PetscValidPointer(array, 3); 31595c0bae8cSAshish Patel PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 31605c0bae8cSAshish Patel PetscFunctionReturn(PETSC_SUCCESS); 31615c0bae8cSAshish Patel } 31625c0bae8cSAshish Patel 316324b6179bSKris Buschelman /*MC 31642692d6eeSBarry Smith MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 316524b6179bSKris Buschelman distributed and sequential matrices via the external package MUMPS. 316624b6179bSKris Buschelman 316711a5261eSBarry Smith Works with `MATAIJ` and `MATSBAIJ` matrices 316824b6179bSKris Buschelman 3169c2b89b5dSBarry Smith Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3170c2b89b5dSBarry Smith 31712ef1f0ffSBarry Smith Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 31722ef1f0ffSBarry Smith See details below. 3173217d3b1eSJunchao Zhang 31742ef1f0ffSBarry Smith Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3175c2b89b5dSBarry Smith 317624b6179bSKris Buschelman Options Database Keys: 31774422a9fcSPatrick Sanan + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 31784422a9fcSPatrick Sanan . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 31794422a9fcSPatrick Sanan . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 31804422a9fcSPatrick Sanan . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 31814422a9fcSPatrick Sanan . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3182b53c1a7fSBarry Smith . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3183b53c1a7fSBarry Smith Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 31844422a9fcSPatrick Sanan . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 31854422a9fcSPatrick Sanan . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 31864422a9fcSPatrick Sanan . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 31874422a9fcSPatrick Sanan . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 31884422a9fcSPatrick Sanan . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 31894422a9fcSPatrick Sanan . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 319045e3843bSPierre Jolivet . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 31914422a9fcSPatrick Sanan . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 319225aac85cSJunchao Zhang . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 31934422a9fcSPatrick Sanan . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 31944422a9fcSPatrick Sanan . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 31954422a9fcSPatrick Sanan . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 31964422a9fcSPatrick Sanan . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 31974422a9fcSPatrick Sanan . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 31984422a9fcSPatrick Sanan . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 31994422a9fcSPatrick Sanan . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 32004422a9fcSPatrick Sanan . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 32014422a9fcSPatrick Sanan . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 32024422a9fcSPatrick Sanan . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3203a0e18203SThibaut Appel . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3204a0e18203SThibaut Appel . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3205a0e18203SThibaut Appel . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 32064422a9fcSPatrick Sanan . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 32074422a9fcSPatrick Sanan . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 32084422a9fcSPatrick Sanan . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 32094422a9fcSPatrick Sanan . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3210217d3b1eSJunchao Zhang . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3211a0e18203SThibaut Appel . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3212217d3b1eSJunchao Zhang - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3213217d3b1eSJunchao Zhang Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 321424b6179bSKris Buschelman 321524b6179bSKris Buschelman Level: beginner 321624b6179bSKris Buschelman 321795452b02SPatrick Sanan Notes: 32182ef1f0ffSBarry Smith MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 32192ef1f0ffSBarry Smith error if the matrix is Hermitian. 322038548759SBarry Smith 322126cc229bSBarry Smith When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 322226cc229bSBarry Smith `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 322326cc229bSBarry Smith 32242ef1f0ffSBarry Smith When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 32252ef1f0ffSBarry Smith the failure with 32262ef1f0ffSBarry Smith .vb 32272ef1f0ffSBarry Smith KSPGetPC(ksp,&pc); 32282ef1f0ffSBarry Smith PCFactorGetMatrix(pc,&mat); 32292ef1f0ffSBarry Smith MatMumpsGetInfo(mat,....); 32302ef1f0ffSBarry Smith MatMumpsGetInfog(mat,....); etc. 32312ef1f0ffSBarry Smith .ve 32322ef1f0ffSBarry Smith Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 32339fc87aa7SBarry Smith 3234a5399872SJunchao Zhang MUMPS provides 64-bit integer support in two build modes: 3235a5399872SJunchao Zhang full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3236a5399872SJunchao Zhang requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 32378fcaa860SBarry Smith 3238a5399872SJunchao Zhang selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3239a5399872SJunchao Zhang MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3240a5399872SJunchao Zhang columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3241a5399872SJunchao Zhang integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3242a5399872SJunchao Zhang 3243a5399872SJunchao Zhang With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3244a5399872SJunchao Zhang 3245a5399872SJunchao Zhang Two modes to run MUMPS/PETSc with OpenMP 32462ef1f0ffSBarry Smith .vb 32472ef1f0ffSBarry Smith Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 32482ef1f0ffSBarry Smith threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 32492ef1f0ffSBarry Smith .ve 32508fcaa860SBarry Smith 32512ef1f0ffSBarry Smith .vb 32522ef1f0ffSBarry Smith -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 32532ef1f0ffSBarry Smith if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 32542ef1f0ffSBarry Smith .ve 32558fcaa860SBarry Smith 32568fcaa860SBarry Smith To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 32572ef1f0ffSBarry Smith (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 32582ef1f0ffSBarry Smith (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 32598fcaa860SBarry Smith libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 32608fcaa860SBarry Smith (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3261217d3b1eSJunchao Zhang 32628fcaa860SBarry Smith If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3263217d3b1eSJunchao Zhang processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3264217d3b1eSJunchao Zhang size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3265217d3b1eSJunchao Zhang are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3266217d3b1eSJunchao Zhang by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3267217d3b1eSJunchao Zhang In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3268217d3b1eSJunchao Zhang if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3269217d3b1eSJunchao Zhang MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3270217d3b1eSJunchao Zhang cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3271217d3b1eSJunchao Zhang problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 32728fcaa860SBarry Smith For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3273217d3b1eSJunchao Zhang examine the mapping result. 3274217d3b1eSJunchao Zhang 327511a5261eSBarry Smith PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 327611a5261eSBarry Smith for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 327711a5261eSBarry Smith calls `omp_set_num_threads`(m) internally before calling MUMPS. 3278217d3b1eSJunchao Zhang 3279217d3b1eSJunchao Zhang References: 3280606c0280SSatish Balay + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3281606c0280SSatish Balay - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3282217d3b1eSJunchao Zhang 32831cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 328424b6179bSKris Buschelman M*/ 328524b6179bSKris Buschelman 3286d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3287d71ae5a4SJacob Faibussowitsch { 328835bd34faSBarry Smith PetscFunctionBegin; 32892692d6eeSBarry Smith *type = MATSOLVERMUMPS; 32903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 329135bd34faSBarry Smith } 329235bd34faSBarry Smith 3293bccb9932SShri Abhyankar /* MatGetFactor for Seq and MPI AIJ matrices */ 3294d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3295d71ae5a4SJacob Faibussowitsch { 32962877fffaSHong Zhang Mat B; 32972877fffaSHong Zhang Mat_MUMPS *mumps; 3298ace3abfcSBarry Smith PetscBool isSeqAIJ; 32992c7c0729SBarry Smith PetscMPIInt size; 33002877fffaSHong Zhang 33012877fffaSHong Zhang PetscFunctionBegin; 3302eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 330303e5aca4SStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 330403e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 330503e5aca4SStefano Zampini *F = NULL; 330603e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 330703e5aca4SStefano Zampini } 3308eb1ec7c1SStefano Zampini #endif 33092877fffaSHong Zhang /* Create the factorization matrix */ 33109566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 33119566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 33129566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 33139566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 33149566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 33152877fffaSHong Zhang 33164dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 33172205254eSKarl Rupp 33182877fffaSHong Zhang B->ops->view = MatView_MUMPS; 331935bd34faSBarry Smith B->ops->getinfo = MatGetInfo_MUMPS; 33202205254eSKarl Rupp 33219566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 33229566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 33239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 33249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 33259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 33269566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 33279566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 33289566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 33299566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 33309566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 33319566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 33325c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 33339566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 33349566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 33356444a565SStefano Zampini 3336450b117fSShri Abhyankar if (ftype == MAT_FACTOR_LU) { 3337450b117fSShri Abhyankar B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3338d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 3339bccb9932SShri Abhyankar if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3340bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 33419566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3342746480a1SHong Zhang mumps->sym = 0; 3343dcd589f8SShri Abhyankar } else { 334467877ebaSShri Abhyankar B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3345450b117fSShri Abhyankar B->factortype = MAT_FACTOR_CHOLESKY; 3346bccb9932SShri Abhyankar if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3347bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 33489566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 334959ac8732SStefano Zampini #if defined(PETSC_USE_COMPLEX) 335059ac8732SStefano Zampini mumps->sym = 2; 335159ac8732SStefano Zampini #else 3352b94d7dedSBarry Smith if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 33536fdc2a6dSBarry Smith else mumps->sym = 2; 335459ac8732SStefano Zampini #endif 3355450b117fSShri Abhyankar } 33562877fffaSHong Zhang 335700c67f3bSHong Zhang /* set solvertype */ 33589566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 33599566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 33609566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 33612c7c0729SBarry Smith if (size == 1) { 33624ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3363f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 33642c7c0729SBarry Smith } 33652877fffaSHong Zhang B->ops->destroy = MatDestroy_MUMPS; 3366e69c285eSBarry Smith B->data = (void *)mumps; 33672205254eSKarl Rupp 33682877fffaSHong Zhang *F = B; 3369413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3370413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3371413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3372d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 33733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 33742877fffaSHong Zhang } 33752877fffaSHong Zhang 3376bccb9932SShri Abhyankar /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3377d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3378d71ae5a4SJacob Faibussowitsch { 33792877fffaSHong Zhang Mat B; 33802877fffaSHong Zhang Mat_MUMPS *mumps; 3381ace3abfcSBarry Smith PetscBool isSeqSBAIJ; 33822c7c0729SBarry Smith PetscMPIInt size; 33832877fffaSHong Zhang 33842877fffaSHong Zhang PetscFunctionBegin; 3385eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 338603e5aca4SStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 338703e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 338803e5aca4SStefano Zampini *F = NULL; 338903e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 339003e5aca4SStefano Zampini } 3391eb1ec7c1SStefano Zampini #endif 33929566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 33939566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 33949566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 33959566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 3396e69c285eSBarry Smith 33974dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 33989566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3399bccb9932SShri Abhyankar if (isSeqSBAIJ) { 340016ebf90aSShri Abhyankar mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3401dcd589f8SShri Abhyankar } else { 3402bccb9932SShri Abhyankar mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3403bccb9932SShri Abhyankar } 3404bccb9932SShri Abhyankar 340567877ebaSShri Abhyankar B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3406bccb9932SShri Abhyankar B->ops->view = MatView_MUMPS; 3407722b6324SPierre Jolivet B->ops->getinfo = MatGetInfo_MUMPS; 34082205254eSKarl Rupp 34099566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 34109566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 34119566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 34129566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 34139566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 34149566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 34159566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 34169566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 34179566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 34189566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 34199566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 34205c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 34219566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 34229566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 34232205254eSKarl Rupp 3424f4762488SHong Zhang B->factortype = MAT_FACTOR_CHOLESKY; 342559ac8732SStefano Zampini #if defined(PETSC_USE_COMPLEX) 342659ac8732SStefano Zampini mumps->sym = 2; 342759ac8732SStefano Zampini #else 3428b94d7dedSBarry Smith if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 34296fdc2a6dSBarry Smith else mumps->sym = 2; 343059ac8732SStefano Zampini #endif 3431a214ac2aSShri Abhyankar 343200c67f3bSHong Zhang /* set solvertype */ 34339566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 34349566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 34359566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 34362c7c0729SBarry Smith if (size == 1) { 34374ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3438f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 34392c7c0729SBarry Smith } 34409566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3441f3c0ef26SHong Zhang B->ops->destroy = MatDestroy_MUMPS; 3442e69c285eSBarry Smith B->data = (void *)mumps; 34432205254eSKarl Rupp 34442877fffaSHong Zhang *F = B; 3445413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3446413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3447413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3448d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 34493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 34502877fffaSHong Zhang } 345197969023SHong Zhang 3452d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3453d71ae5a4SJacob Faibussowitsch { 345467877ebaSShri Abhyankar Mat B; 345567877ebaSShri Abhyankar Mat_MUMPS *mumps; 3456ace3abfcSBarry Smith PetscBool isSeqBAIJ; 34572c7c0729SBarry Smith PetscMPIInt size; 345867877ebaSShri Abhyankar 345967877ebaSShri Abhyankar PetscFunctionBegin; 346067877ebaSShri Abhyankar /* Create the factorization matrix */ 34619566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 34629566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 34639566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 34649566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 34659566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 3466450b117fSShri Abhyankar 34674dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 3468450b117fSShri Abhyankar if (ftype == MAT_FACTOR_LU) { 3469450b117fSShri Abhyankar B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3470450b117fSShri Abhyankar B->factortype = MAT_FACTOR_LU; 3471bccb9932SShri Abhyankar if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3472bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3473746480a1SHong Zhang mumps->sym = 0; 34749566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3475546078acSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3476bccb9932SShri Abhyankar 3477450b117fSShri Abhyankar B->ops->view = MatView_MUMPS; 3478722b6324SPierre Jolivet B->ops->getinfo = MatGetInfo_MUMPS; 34792205254eSKarl Rupp 34809566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 34819566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 34829566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 34839566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 34849566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 34859566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 34869566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 34879566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 34889566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 34899566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 34909566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 34915c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 34929566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 34939566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3494450b117fSShri Abhyankar 349500c67f3bSHong Zhang /* set solvertype */ 34969566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 34979566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 34989566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 34992c7c0729SBarry Smith if (size == 1) { 35004ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3501f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 35022c7c0729SBarry Smith } 35037ee00b23SStefano Zampini B->ops->destroy = MatDestroy_MUMPS; 35047ee00b23SStefano Zampini B->data = (void *)mumps; 35057ee00b23SStefano Zampini 35067ee00b23SStefano Zampini *F = B; 3507413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3508413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3509413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3510d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 35113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 35127ee00b23SStefano Zampini } 35137ee00b23SStefano Zampini 35147ee00b23SStefano Zampini /* MatGetFactor for Seq and MPI SELL matrices */ 3515d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3516d71ae5a4SJacob Faibussowitsch { 35177ee00b23SStefano Zampini Mat B; 35187ee00b23SStefano Zampini Mat_MUMPS *mumps; 35197ee00b23SStefano Zampini PetscBool isSeqSELL; 35202c7c0729SBarry Smith PetscMPIInt size; 35217ee00b23SStefano Zampini 35227ee00b23SStefano Zampini PetscFunctionBegin; 35237ee00b23SStefano Zampini /* Create the factorization matrix */ 35249566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 35259566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 35269566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 35279566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 35289566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 35297ee00b23SStefano Zampini 35304dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 35317ee00b23SStefano Zampini 35327ee00b23SStefano Zampini B->ops->view = MatView_MUMPS; 35337ee00b23SStefano Zampini B->ops->getinfo = MatGetInfo_MUMPS; 35347ee00b23SStefano Zampini 35359566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 35369566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 35379566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 35389566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 35399566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 35409566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 35419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 35429566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 35439566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 35449566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 35459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 35465c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 35477ee00b23SStefano Zampini 35487ee00b23SStefano Zampini if (ftype == MAT_FACTOR_LU) { 35497ee00b23SStefano Zampini B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 35507ee00b23SStefano Zampini B->factortype = MAT_FACTOR_LU; 35517ee00b23SStefano Zampini if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 35527ee00b23SStefano Zampini else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 35537ee00b23SStefano Zampini mumps->sym = 0; 35549566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 35557ee00b23SStefano Zampini } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 35567ee00b23SStefano Zampini 35577ee00b23SStefano Zampini /* set solvertype */ 35589566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 35599566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 35609566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 35612c7c0729SBarry Smith if (size == 1) { 35624ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3563f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 35642c7c0729SBarry Smith } 3565450b117fSShri Abhyankar B->ops->destroy = MatDestroy_MUMPS; 3566e69c285eSBarry Smith B->data = (void *)mumps; 35672205254eSKarl Rupp 3568450b117fSShri Abhyankar *F = B; 3569413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3570413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3571413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3572d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 35733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3574450b117fSShri Abhyankar } 357542c9c57cSBarry Smith 35769d0448ceSStefano Zampini /* MatGetFactor for MATNEST matrices */ 35779d0448ceSStefano Zampini static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 35789d0448ceSStefano Zampini { 35799d0448ceSStefano Zampini Mat B, **mats; 35809d0448ceSStefano Zampini Mat_MUMPS *mumps; 35819d0448ceSStefano Zampini PetscInt nr, nc; 35829d0448ceSStefano Zampini PetscMPIInt size; 358303e5aca4SStefano Zampini PetscBool flg = PETSC_TRUE; 35849d0448ceSStefano Zampini 35859d0448ceSStefano Zampini PetscFunctionBegin; 35869d0448ceSStefano Zampini #if defined(PETSC_USE_COMPLEX) 358703e5aca4SStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 358803e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 358903e5aca4SStefano Zampini *F = NULL; 359003e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 359103e5aca4SStefano Zampini } 35929d0448ceSStefano Zampini #endif 35939d0448ceSStefano Zampini 359403e5aca4SStefano Zampini /* Return if some condition is not satisfied */ 359503e5aca4SStefano Zampini *F = NULL; 35969d0448ceSStefano Zampini PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 35979d0448ceSStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY) { 35989d0448ceSStefano Zampini IS *rows, *cols; 35999d0448ceSStefano Zampini PetscInt *m, *M; 36009d0448ceSStefano Zampini 36019d0448ceSStefano Zampini PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 36029d0448ceSStefano Zampini PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 36039d0448ceSStefano Zampini PetscCall(MatNestGetISs(A, rows, cols)); 36049d0448ceSStefano Zampini for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 360503e5aca4SStefano Zampini if (!flg) { 360603e5aca4SStefano Zampini PetscCall(PetscFree2(rows, cols)); 360703e5aca4SStefano Zampini PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 360803e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 360903e5aca4SStefano Zampini } 36109d0448ceSStefano Zampini PetscCall(PetscMalloc2(nr, &m, nr, &M)); 36119d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 36129d0448ceSStefano Zampini for (PetscInt r = 0; flg && r < nr; r++) 36139d0448ceSStefano Zampini for (PetscInt k = r + 1; flg && k < nr; k++) 36149d0448ceSStefano Zampini if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 36159d0448ceSStefano Zampini PetscCall(PetscFree2(m, M)); 36169d0448ceSStefano Zampini PetscCall(PetscFree2(rows, cols)); 361703e5aca4SStefano Zampini if (!flg) { 361803e5aca4SStefano Zampini PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 361903e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 362003e5aca4SStefano Zampini } 36219d0448ceSStefano Zampini } 36229d0448ceSStefano Zampini 36239d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 36249d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 36259d0448ceSStefano Zampini Mat sub = mats[r][c]; 36265d955bbbSStefano Zampini PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans; 36279d0448ceSStefano Zampini 36289d0448ceSStefano Zampini if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 36295d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 36305d955bbbSStefano Zampini if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 36315d955bbbSStefano Zampini else { 36325d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 36335d955bbbSStefano Zampini if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 36345d955bbbSStefano Zampini } 36359d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 36369d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 36379d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 36389d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 36399d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 36409d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 36419d0448ceSStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY) { 364203e5aca4SStefano Zampini if (r == c && !isSeqAIJ && !isMPIAIJ && !isSeqSBAIJ && !isMPISBAIJ) { 364303e5aca4SStefano Zampini PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 364403e5aca4SStefano Zampini flg = PETSC_FALSE; 364503e5aca4SStefano Zampini } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 364603e5aca4SStefano Zampini PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 364703e5aca4SStefano Zampini flg = PETSC_FALSE; 364803e5aca4SStefano Zampini } 364903e5aca4SStefano Zampini } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 365003e5aca4SStefano Zampini PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 365103e5aca4SStefano Zampini flg = PETSC_FALSE; 36529d0448ceSStefano Zampini } 36539d0448ceSStefano Zampini } 365403e5aca4SStefano Zampini } 365503e5aca4SStefano Zampini if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 36569d0448ceSStefano Zampini 36579d0448ceSStefano Zampini /* Create the factorization matrix */ 36589d0448ceSStefano Zampini PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 36599d0448ceSStefano Zampini PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 36609d0448ceSStefano Zampini PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 36619d0448ceSStefano Zampini PetscCall(MatSetUp(B)); 36629d0448ceSStefano Zampini 36639d0448ceSStefano Zampini PetscCall(PetscNew(&mumps)); 36649d0448ceSStefano Zampini 36659d0448ceSStefano Zampini B->ops->view = MatView_MUMPS; 36669d0448ceSStefano Zampini B->ops->getinfo = MatGetInfo_MUMPS; 36679d0448ceSStefano Zampini 36689d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 36699d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 36709d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 36719d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 36729d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 36739d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 36749d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 36759d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 36769d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 36779d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 36789d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 36799d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 36809d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 36819d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 36829d0448ceSStefano Zampini 36839d0448ceSStefano Zampini if (ftype == MAT_FACTOR_LU) { 36849d0448ceSStefano Zampini B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 36859d0448ceSStefano Zampini B->factortype = MAT_FACTOR_LU; 36869d0448ceSStefano Zampini mumps->sym = 0; 36879d0448ceSStefano Zampini } else { 36889d0448ceSStefano Zampini B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 36899d0448ceSStefano Zampini B->factortype = MAT_FACTOR_CHOLESKY; 36909d0448ceSStefano Zampini #if defined(PETSC_USE_COMPLEX) 36919d0448ceSStefano Zampini mumps->sym = 2; 36929d0448ceSStefano Zampini #else 36939d0448ceSStefano Zampini if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 36949d0448ceSStefano Zampini else mumps->sym = 2; 36959d0448ceSStefano Zampini #endif 36969d0448ceSStefano Zampini } 36979d0448ceSStefano Zampini mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 36989d0448ceSStefano Zampini PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 36999d0448ceSStefano Zampini 37009d0448ceSStefano Zampini PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 37019d0448ceSStefano Zampini if (size == 1) { 37029d0448ceSStefano Zampini /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 37039d0448ceSStefano Zampini B->canuseordering = PETSC_TRUE; 37049d0448ceSStefano Zampini } 37059d0448ceSStefano Zampini 37069d0448ceSStefano Zampini /* set solvertype */ 37079d0448ceSStefano Zampini PetscCall(PetscFree(B->solvertype)); 37089d0448ceSStefano Zampini PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 37099d0448ceSStefano Zampini B->ops->destroy = MatDestroy_MUMPS; 37109d0448ceSStefano Zampini B->data = (void *)mumps; 37119d0448ceSStefano Zampini 37129d0448ceSStefano Zampini *F = B; 37139d0448ceSStefano Zampini mumps->id.job = JOB_NULL; 37149d0448ceSStefano Zampini mumps->ICNTL_pre = NULL; 37159d0448ceSStefano Zampini mumps->CNTL_pre = NULL; 37169d0448ceSStefano Zampini mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 37179d0448ceSStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 37189d0448ceSStefano Zampini } 37199d0448ceSStefano Zampini 3720d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3721d71ae5a4SJacob Faibussowitsch { 372242c9c57cSBarry Smith PetscFunctionBegin; 37239566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 37249566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 37259566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 37269566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 37279566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 37289566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 37299566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 37309566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 37319566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 37329566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 37339566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 37349d0448ceSStefano Zampini PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 37359d0448ceSStefano Zampini PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 37363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 373742c9c57cSBarry Smith } 3738