11c2a3de1SBarry Smith 2397b6df1SKris Buschelman /* 3c2b5dc30SHong Zhang Provides an interface to the MUMPS sparse solver 4397b6df1SKris Buschelman */ 567602552SJunchao Zhang #include <petscpkg_version.h> 69d0448ceSStefano Zampini #include <petscsf.h> 7c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 8c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 97ee00b23SStefano Zampini #include <../src/mat/impls/sell/mpi/mpisell.h> 10397b6df1SKris Buschelman 11397b6df1SKris Buschelman EXTERN_C_BEGIN 12397b6df1SKris Buschelman #if defined(PETSC_USE_COMPLEX) 132907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 142907cef9SHong Zhang #include <cmumps_c.h> 152907cef9SHong Zhang #else 16c6db04a5SJed Brown #include <zmumps_c.h> 172907cef9SHong Zhang #endif 182907cef9SHong Zhang #else 192907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 202907cef9SHong Zhang #include <smumps_c.h> 21397b6df1SKris Buschelman #else 22c6db04a5SJed Brown #include <dmumps_c.h> 23397b6df1SKris Buschelman #endif 242907cef9SHong Zhang #endif 25397b6df1SKris Buschelman EXTERN_C_END 26397b6df1SKris Buschelman #define JOB_INIT -1 27413bcc21SPierre Jolivet #define JOB_NULL 0 283d472b54SHong Zhang #define JOB_FACTSYMBOLIC 1 293d472b54SHong Zhang #define JOB_FACTNUMERIC 2 303d472b54SHong Zhang #define JOB_SOLVE 3 31397b6df1SKris Buschelman #define JOB_END -2 323d472b54SHong Zhang 332907cef9SHong Zhang /* calls to MUMPS */ 342907cef9SHong Zhang #if defined(PETSC_USE_COMPLEX) 352907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 363ab56b82SJunchao Zhang #define MUMPS_c cmumps_c 372907cef9SHong Zhang #else 383ab56b82SJunchao Zhang #define MUMPS_c zmumps_c 392907cef9SHong Zhang #endif 402907cef9SHong Zhang #else 412907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 423ab56b82SJunchao Zhang #define MUMPS_c smumps_c 432907cef9SHong Zhang #else 443ab56b82SJunchao Zhang #define MUMPS_c dmumps_c 452907cef9SHong Zhang #endif 462907cef9SHong Zhang #endif 472907cef9SHong Zhang 48a6053eceSJunchao Zhang /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 49a6053eceSJunchao Zhang number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 50a6053eceSJunchao Zhang naming convention in PetscMPIInt, PetscBLASInt etc. 51a6053eceSJunchao Zhang */ 52a6053eceSJunchao Zhang typedef MUMPS_INT PetscMUMPSInt; 53a6053eceSJunchao Zhang 5467602552SJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 5567602552SJunchao Zhang #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 56a6053eceSJunchao Zhang #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 5767602552SJunchao Zhang #endif 58a6053eceSJunchao Zhang #else 5967602552SJunchao Zhang #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 6067602552SJunchao Zhang #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 6167602552SJunchao Zhang #endif 6267602552SJunchao Zhang #endif 6367602552SJunchao Zhang 64a6053eceSJunchao Zhang #define MPIU_MUMPSINT MPI_INT 65a6053eceSJunchao Zhang #define PETSC_MUMPS_INT_MAX 2147483647 66a6053eceSJunchao Zhang #define PETSC_MUMPS_INT_MIN -2147483648 67a6053eceSJunchao Zhang 68a6053eceSJunchao Zhang /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 69d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 70d71ae5a4SJacob Faibussowitsch { 71a6053eceSJunchao Zhang PetscFunctionBegin; 72ece88022SPierre Jolivet #if PetscDefined(USE_64BIT_INDICES) 732c71b3e2SJacob Faibussowitsch PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 74ece88022SPierre Jolivet #endif 75a6053eceSJunchao Zhang *b = (PetscMUMPSInt)(a); 763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 77a6053eceSJunchao Zhang } 78a6053eceSJunchao Zhang 79a6053eceSJunchao Zhang /* Put these utility routines here since they are only used in this file */ 80d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 81d71ae5a4SJacob Faibussowitsch { 82a6053eceSJunchao Zhang PetscInt myval; 83a6053eceSJunchao Zhang PetscBool myset; 84a6053eceSJunchao Zhang PetscFunctionBegin; 85a6053eceSJunchao Zhang /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 869566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 879566063dSJacob Faibussowitsch if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 88a6053eceSJunchao Zhang if (set) *set = myset; 893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 90a6053eceSJunchao Zhang } 91a6053eceSJunchao Zhang #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 92a6053eceSJunchao Zhang 93217d3b1eSJunchao Zhang /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 943ab56b82SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 953ab56b82SJunchao Zhang #define PetscMUMPS_c(mumps) \ 963ab56b82SJunchao Zhang do { \ 973ab56b82SJunchao Zhang if (mumps->use_petsc_omp_support) { \ 983ab56b82SJunchao Zhang if (mumps->is_omp_master) { \ 999566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 10014ffdc6fSStefano Zampini PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 10114ffdc6fSStefano Zampini PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 10214ffdc6fSStefano Zampini PetscCall(PetscFPTrapPop()); \ 1039566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 1043ab56b82SJunchao Zhang } \ 1059566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 106c3714a1dSJunchao Zhang /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 107c3714a1dSJunchao Zhang to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 108c3714a1dSJunchao Zhang an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 109c3714a1dSJunchao Zhang omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 110c3714a1dSJunchao Zhang */ \ 111338d3105SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 112338d3105SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 113338d3105SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 114338d3105SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 1153ab56b82SJunchao Zhang } else { \ 11614ffdc6fSStefano Zampini PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 11714ffdc6fSStefano Zampini PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 11814ffdc6fSStefano Zampini PetscCall(PetscFPTrapPop()); \ 1193ab56b82SJunchao Zhang } \ 1203ab56b82SJunchao Zhang } while (0) 1213ab56b82SJunchao Zhang #else 1223ab56b82SJunchao Zhang #define PetscMUMPS_c(mumps) \ 123d71ae5a4SJacob Faibussowitsch do { \ 12414ffdc6fSStefano Zampini PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 12514ffdc6fSStefano Zampini PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 12614ffdc6fSStefano Zampini PetscCall(PetscFPTrapPop()); \ 127d71ae5a4SJacob Faibussowitsch } while (0) 1283ab56b82SJunchao Zhang #endif 1293ab56b82SJunchao Zhang 130940cd9d6SSatish Balay /* declare MumpsScalar */ 131940cd9d6SSatish Balay #if defined(PETSC_USE_COMPLEX) 132940cd9d6SSatish Balay #if defined(PETSC_USE_REAL_SINGLE) 133940cd9d6SSatish Balay #define MumpsScalar mumps_complex 134940cd9d6SSatish Balay #else 135940cd9d6SSatish Balay #define MumpsScalar mumps_double_complex 136940cd9d6SSatish Balay #endif 137940cd9d6SSatish Balay #else 138940cd9d6SSatish Balay #define MumpsScalar PetscScalar 139940cd9d6SSatish Balay #endif 1403d472b54SHong Zhang 141397b6df1SKris Buschelman /* macros s.t. indices match MUMPS documentation */ 142397b6df1SKris Buschelman #define ICNTL(I) icntl[(I)-1] 143397b6df1SKris Buschelman #define CNTL(I) cntl[(I)-1] 144397b6df1SKris Buschelman #define INFOG(I) infog[(I)-1] 145a7aca84bSHong Zhang #define INFO(I) info[(I)-1] 146397b6df1SKris Buschelman #define RINFOG(I) rinfog[(I)-1] 147adc1d99fSHong Zhang #define RINFO(I) rinfo[(I)-1] 148397b6df1SKris Buschelman 149a6053eceSJunchao Zhang typedef struct Mat_MUMPS Mat_MUMPS; 150a6053eceSJunchao Zhang struct Mat_MUMPS { 151397b6df1SKris Buschelman #if defined(PETSC_USE_COMPLEX) 1522907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 1532907cef9SHong Zhang CMUMPS_STRUC_C id; 1542907cef9SHong Zhang #else 155397b6df1SKris Buschelman ZMUMPS_STRUC_C id; 1562907cef9SHong Zhang #endif 1572907cef9SHong Zhang #else 1582907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 1592907cef9SHong Zhang SMUMPS_STRUC_C id; 160397b6df1SKris Buschelman #else 161397b6df1SKris Buschelman DMUMPS_STRUC_C id; 162397b6df1SKris Buschelman #endif 1632907cef9SHong Zhang #endif 1642907cef9SHong Zhang 165397b6df1SKris Buschelman MatStructure matstruc; 1662d4298aeSJunchao Zhang PetscMPIInt myid, petsc_size; 167a6053eceSJunchao Zhang PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 168a6053eceSJunchao Zhang PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 169a6053eceSJunchao Zhang PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 170a6053eceSJunchao Zhang PetscMUMPSInt sym; 1712d4298aeSJunchao Zhang MPI_Comm mumps_comm; 172413bcc21SPierre Jolivet PetscMUMPSInt *ICNTL_pre; 173413bcc21SPierre Jolivet PetscReal *CNTL_pre; 174a6053eceSJunchao Zhang PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 175801fbe65SHong Zhang VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 17625aac85cSJunchao Zhang PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 17767602552SJunchao Zhang PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 17867602552SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 17967602552SJunchao Zhang PetscInt *rhs_nrow, max_nrhs; 18067602552SJunchao Zhang PetscMPIInt *rhs_recvcounts, *rhs_disps; 18167602552SJunchao Zhang PetscScalar *rhs_loc, *rhs_recvbuf; 18267602552SJunchao Zhang #endif 183801fbe65SHong Zhang Vec b_seq, x_seq; 184a6053eceSJunchao Zhang PetscInt ninfo, *info; /* which INFO to display */ 185b5fa320bSStefano Zampini PetscInt sizeredrhs; 18659ac8732SStefano Zampini PetscScalar *schur_sol; 18759ac8732SStefano Zampini PetscInt schur_sizesol; 188a6053eceSJunchao Zhang PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 189a6053eceSJunchao Zhang PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 190a6053eceSJunchao Zhang PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 1912205254eSKarl Rupp 1929d0448ceSStefano Zampini /* Support for MATNEST */ 1939d0448ceSStefano Zampini PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 1949d0448ceSStefano Zampini PetscInt64 *nest_vals_start; 1959d0448ceSStefano Zampini PetscScalar *nest_vals; 1969d0448ceSStefano Zampini 197a6053eceSJunchao Zhang /* stuff used by petsc/mumps OpenMP support*/ 1983ab56b82SJunchao Zhang PetscBool use_petsc_omp_support; 199da81f932SPierre Jolivet PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 2003ab56b82SJunchao Zhang MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 201a6053eceSJunchao Zhang PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 202a6053eceSJunchao Zhang PetscMPIInt tag, omp_comm_size; 2033ab56b82SJunchao Zhang PetscBool is_omp_master; /* is this rank the master of omp_comm */ 204a6053eceSJunchao Zhang MPI_Request *reqs; 205a6053eceSJunchao Zhang }; 2063ab56b82SJunchao Zhang 207a6053eceSJunchao Zhang /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 208a6053eceSJunchao Zhang Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 209a6053eceSJunchao Zhang */ 210d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 211d71ae5a4SJacob Faibussowitsch { 212a6053eceSJunchao Zhang PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 213f0c56d0fSKris Buschelman 214a6053eceSJunchao Zhang PetscFunctionBegin; 215a6053eceSJunchao Zhang #if defined(PETSC_USE_64BIT_INDICES) 216a6053eceSJunchao Zhang { 217a6053eceSJunchao Zhang PetscInt i; 218a6053eceSJunchao Zhang if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 2199566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ia_alloc)); 2209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 221a6053eceSJunchao Zhang mumps->cur_ilen = nrow + 1; 222a6053eceSJunchao Zhang } 223a6053eceSJunchao Zhang if (nnz > mumps->cur_jlen) { 2249566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ja_alloc)); 2259566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 226a6053eceSJunchao Zhang mumps->cur_jlen = nnz; 227a6053eceSJunchao Zhang } 2289566063dSJacob Faibussowitsch for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 2299566063dSJacob Faibussowitsch for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 230a6053eceSJunchao Zhang *ia_mumps = mumps->ia_alloc; 231a6053eceSJunchao Zhang *ja_mumps = mumps->ja_alloc; 232a6053eceSJunchao Zhang } 233a6053eceSJunchao Zhang #else 234a6053eceSJunchao Zhang *ia_mumps = ia; 235a6053eceSJunchao Zhang *ja_mumps = ja; 236a6053eceSJunchao Zhang #endif 2379566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 2383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 239a6053eceSJunchao Zhang } 240b24902e0SBarry Smith 241d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 242d71ae5a4SJacob Faibussowitsch { 243b5fa320bSStefano Zampini PetscFunctionBegin; 2449566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.listvar_schur)); 2459566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.redrhs)); 2469566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->schur_sol)); 24759ac8732SStefano Zampini mumps->id.size_schur = 0; 248b3cb21ddSStefano Zampini mumps->id.schur_lld = 0; 24959ac8732SStefano Zampini mumps->id.ICNTL(19) = 0; 2503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25159ac8732SStefano Zampini } 25259ac8732SStefano Zampini 253b3cb21ddSStefano Zampini /* solve with rhs in mumps->id.redrhs and return in the same location */ 254d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 255d71ae5a4SJacob Faibussowitsch { 256b3cb21ddSStefano Zampini Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 257b3cb21ddSStefano Zampini Mat S, B, X; 258b3cb21ddSStefano Zampini MatFactorSchurStatus schurstatus; 259b3cb21ddSStefano Zampini PetscInt sizesol; 26059ac8732SStefano Zampini 26159ac8732SStefano Zampini PetscFunctionBegin; 2629566063dSJacob Faibussowitsch PetscCall(MatFactorFactorizeSchurComplement(F)); 2639566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 2649566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 2659566063dSJacob Faibussowitsch PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 266a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2679566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(B, S->boundtocpu)); 268a3d589ffSStefano Zampini #endif 269b3cb21ddSStefano Zampini switch (schurstatus) { 270d71ae5a4SJacob Faibussowitsch case MAT_FACTOR_SCHUR_FACTORED: 271d71ae5a4SJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 272d71ae5a4SJacob Faibussowitsch PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 273a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2749566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(X, S->boundtocpu)); 275a3d589ffSStefano Zampini #endif 276b3cb21ddSStefano Zampini if (!mumps->id.ICNTL(9)) { /* transpose solve */ 2779566063dSJacob Faibussowitsch PetscCall(MatMatSolveTranspose(S, B, X)); 27859ac8732SStefano Zampini } else { 2799566063dSJacob Faibussowitsch PetscCall(MatMatSolve(S, B, X)); 28059ac8732SStefano Zampini } 281b3cb21ddSStefano Zampini break; 282b3cb21ddSStefano Zampini case MAT_FACTOR_SCHUR_INVERTED: 283b3cb21ddSStefano Zampini sizesol = mumps->id.nrhs * mumps->id.size_schur; 28459ac8732SStefano Zampini if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 2859566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->schur_sol)); 2869566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 28759ac8732SStefano Zampini mumps->schur_sizesol = sizesol; 288b5fa320bSStefano Zampini } 2899566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 2909566063dSJacob Faibussowitsch PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 291a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2929566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(X, S->boundtocpu)); 293a3d589ffSStefano Zampini #endif 2949566063dSJacob Faibussowitsch PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 29559ac8732SStefano Zampini if (!mumps->id.ICNTL(9)) { /* transpose solve */ 2969566063dSJacob Faibussowitsch PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 297b5fa320bSStefano Zampini } else { 2989566063dSJacob Faibussowitsch PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 299b5fa320bSStefano Zampini } 3009566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(X)); 3019566063dSJacob Faibussowitsch PetscCall(MatProductSymbolic(X)); 3029566063dSJacob Faibussowitsch PetscCall(MatProductNumeric(X)); 3034417c5e8SHong Zhang 3049566063dSJacob Faibussowitsch PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 305b3cb21ddSStefano Zampini break; 306d71ae5a4SJacob Faibussowitsch default: 307d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 30859ac8732SStefano Zampini } 3099566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 3109566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 3119566063dSJacob Faibussowitsch PetscCall(MatDestroy(&X)); 3123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 313b5fa320bSStefano Zampini } 314b5fa320bSStefano Zampini 315d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 316d71ae5a4SJacob Faibussowitsch { 317b3cb21ddSStefano Zampini Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 318b5fa320bSStefano Zampini 319b5fa320bSStefano Zampini PetscFunctionBegin; 320b5fa320bSStefano Zampini if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 3213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 322b5fa320bSStefano Zampini } 323b8f61ee1SStefano Zampini if (!expansion) { /* prepare for the condensation step */ 324b5fa320bSStefano Zampini PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 325b5fa320bSStefano Zampini /* allocate MUMPS internal array to store reduced right-hand sides */ 326b5fa320bSStefano Zampini if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 3279566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.redrhs)); 328b5fa320bSStefano Zampini mumps->id.lredrhs = mumps->id.size_schur; 3299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 330b5fa320bSStefano Zampini mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 331b5fa320bSStefano Zampini } 332b5fa320bSStefano Zampini mumps->id.ICNTL(26) = 1; /* condensation phase */ 333b5fa320bSStefano Zampini } else { /* prepare for the expansion step */ 334b8f61ee1SStefano Zampini /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 3359566063dSJacob Faibussowitsch PetscCall(MatMumpsSolveSchur_Private(F)); 336b5fa320bSStefano Zampini mumps->id.ICNTL(26) = 2; /* expansion phase */ 3373ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 33808401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 339b5fa320bSStefano Zampini /* restore defaults */ 340b5fa320bSStefano Zampini mumps->id.ICNTL(26) = -1; 341d3d598ffSStefano Zampini /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 342d3d598ffSStefano Zampini if (mumps->id.nrhs > 1) { 3439566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.redrhs)); 344d3d598ffSStefano Zampini mumps->id.lredrhs = 0; 345d3d598ffSStefano Zampini mumps->sizeredrhs = 0; 346d3d598ffSStefano Zampini } 347b5fa320bSStefano Zampini } 3483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 349b5fa320bSStefano Zampini } 350b5fa320bSStefano Zampini 351397b6df1SKris Buschelman /* 352d341cd04SHong Zhang MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 353d341cd04SHong Zhang 354397b6df1SKris Buschelman input: 35575480915SPierre Jolivet A - matrix in aij,baij or sbaij format 356397b6df1SKris Buschelman shift - 0: C style output triple; 1: Fortran style output triple. 357bccb9932SShri Abhyankar reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 358bccb9932SShri Abhyankar MAT_REUSE_MATRIX: only the values in v array are updated 359397b6df1SKris Buschelman output: 360397b6df1SKris Buschelman nnz - dim of r, c, and v (number of local nonzero entries of A) 361397b6df1SKris Buschelman r, c, v - row and col index, matrix values (matrix triples) 362eb9baa12SBarry Smith 363eb9baa12SBarry Smith The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 3647ee00b23SStefano Zampini freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 365eb9baa12SBarry Smith that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 366eb9baa12SBarry Smith 367397b6df1SKris Buschelman */ 36816ebf90aSShri Abhyankar 369d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 370d71ae5a4SJacob Faibussowitsch { 371a3d589ffSStefano Zampini const PetscScalar *av; 372185f6596SHong Zhang const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 373a6053eceSJunchao Zhang PetscInt64 nz, rnz, i, j, k; 374a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 37516ebf90aSShri Abhyankar Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 376397b6df1SKris Buschelman 377397b6df1SKris Buschelman PetscFunctionBegin; 3789566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A, &av)); 379a6053eceSJunchao Zhang mumps->val = (PetscScalar *)av; 380bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 3812205254eSKarl Rupp nz = aa->nz; 3822205254eSKarl Rupp ai = aa->i; 3832205254eSKarl Rupp aj = aa->j; 3849566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 385a6053eceSJunchao Zhang for (i = k = 0; i < M; i++) { 38616ebf90aSShri Abhyankar rnz = ai[i + 1] - ai[i]; 38767877ebaSShri Abhyankar ajj = aj + ai[i]; 38867877ebaSShri Abhyankar for (j = 0; j < rnz; j++) { 3899566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 3909566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 391a6053eceSJunchao Zhang k++; 39216ebf90aSShri Abhyankar } 39316ebf90aSShri Abhyankar } 394a6053eceSJunchao Zhang mumps->irn = row; 395a6053eceSJunchao Zhang mumps->jcn = col; 396a6053eceSJunchao Zhang mumps->nnz = nz; 39716ebf90aSShri Abhyankar } 3989566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 3993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 40016ebf90aSShri Abhyankar } 401397b6df1SKris Buschelman 402d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 403d71ae5a4SJacob Faibussowitsch { 404a6053eceSJunchao Zhang PetscInt64 nz, i, j, k, r; 4057ee00b23SStefano Zampini Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 406a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 4077ee00b23SStefano Zampini 4087ee00b23SStefano Zampini PetscFunctionBegin; 409a6053eceSJunchao Zhang mumps->val = a->val; 4107ee00b23SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 4117ee00b23SStefano Zampini nz = a->sliidx[a->totalslices]; 4129566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 413a6053eceSJunchao Zhang for (i = k = 0; i < a->totalslices; i++) { 41448a46eb9SPierre Jolivet for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 4157ee00b23SStefano Zampini } 4169566063dSJacob Faibussowitsch for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 417a6053eceSJunchao Zhang mumps->irn = row; 418a6053eceSJunchao Zhang mumps->jcn = col; 419a6053eceSJunchao Zhang mumps->nnz = nz; 4207ee00b23SStefano Zampini } 4213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4227ee00b23SStefano Zampini } 4237ee00b23SStefano Zampini 424d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 425d71ae5a4SJacob Faibussowitsch { 42667877ebaSShri Abhyankar Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 42733d57670SJed Brown const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 428a6053eceSJunchao Zhang PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 429a6053eceSJunchao Zhang PetscInt bs; 430a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 43167877ebaSShri Abhyankar 43267877ebaSShri Abhyankar PetscFunctionBegin; 4339566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 43433d57670SJed Brown M = A->rmap->N / bs; 435a6053eceSJunchao Zhang mumps->val = aa->a; 436bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 4379371c9d4SSatish Balay ai = aa->i; 4389371c9d4SSatish Balay aj = aa->j; 43967877ebaSShri Abhyankar nz = bs2 * aa->nz; 4409566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 44167877ebaSShri Abhyankar for (i = 0; i < M; i++) { 44267877ebaSShri Abhyankar ajj = aj + ai[i]; 44367877ebaSShri Abhyankar rnz = ai[i + 1] - ai[i]; 44467877ebaSShri Abhyankar for (k = 0; k < rnz; k++) { 44567877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 44667877ebaSShri Abhyankar for (m = 0; m < bs; m++) { 4479566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 4489566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 449a6053eceSJunchao Zhang idx++; 45067877ebaSShri Abhyankar } 45167877ebaSShri Abhyankar } 45267877ebaSShri Abhyankar } 45367877ebaSShri Abhyankar } 454a6053eceSJunchao Zhang mumps->irn = row; 455a6053eceSJunchao Zhang mumps->jcn = col; 456a6053eceSJunchao Zhang mumps->nnz = nz; 45767877ebaSShri Abhyankar } 4583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 45967877ebaSShri Abhyankar } 46067877ebaSShri Abhyankar 461d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 462d71ae5a4SJacob Faibussowitsch { 46375480915SPierre Jolivet const PetscInt *ai, *aj, *ajj; 464a6053eceSJunchao Zhang PetscInt bs; 465a6053eceSJunchao Zhang PetscInt64 nz, rnz, i, j, k, m; 466a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 46775480915SPierre Jolivet PetscScalar *val; 46816ebf90aSShri Abhyankar Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 46975480915SPierre Jolivet const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 47038548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 471b94d7dedSBarry Smith PetscBool isset, hermitian; 47238548759SBarry Smith #endif 47316ebf90aSShri Abhyankar 47416ebf90aSShri Abhyankar PetscFunctionBegin; 47538548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 476b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 477b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 47838548759SBarry Smith #endif 4792205254eSKarl Rupp ai = aa->i; 4802205254eSKarl Rupp aj = aa->j; 4819566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 48275480915SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 483f3fa974cSJacob Faibussowitsch const PetscInt64 alloc_size = aa->nz * bs2; 484f3fa974cSJacob Faibussowitsch 485f3fa974cSJacob Faibussowitsch PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 486a6053eceSJunchao Zhang if (bs > 1) { 487f3fa974cSJacob Faibussowitsch PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 488a6053eceSJunchao Zhang mumps->val = mumps->val_alloc; 48975480915SPierre Jolivet } else { 490a6053eceSJunchao Zhang mumps->val = aa->a; 49175480915SPierre Jolivet } 492a6053eceSJunchao Zhang mumps->irn = row; 493a6053eceSJunchao Zhang mumps->jcn = col; 494a6053eceSJunchao Zhang } else { 495a6053eceSJunchao Zhang if (bs == 1) mumps->val = aa->a; 496a6053eceSJunchao Zhang row = mumps->irn; 497a6053eceSJunchao Zhang col = mumps->jcn; 498a6053eceSJunchao Zhang } 499a6053eceSJunchao Zhang val = mumps->val; 500185f6596SHong Zhang 50116ebf90aSShri Abhyankar nz = 0; 502a81fe166SPierre Jolivet if (bs > 1) { 50375480915SPierre Jolivet for (i = 0; i < mbs; i++) { 50416ebf90aSShri Abhyankar rnz = ai[i + 1] - ai[i]; 50567877ebaSShri Abhyankar ajj = aj + ai[i]; 50675480915SPierre Jolivet for (j = 0; j < rnz; j++) { 50775480915SPierre Jolivet for (k = 0; k < bs; k++) { 50875480915SPierre Jolivet for (m = 0; m < bs; m++) { 509ec4f40fdSPierre Jolivet if (ajj[j] > i || k >= m) { 51075480915SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 5119566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 5129566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 51375480915SPierre Jolivet } 51475480915SPierre Jolivet val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 51575480915SPierre Jolivet } 51675480915SPierre Jolivet } 51775480915SPierre Jolivet } 51875480915SPierre Jolivet } 51975480915SPierre Jolivet } 520a81fe166SPierre Jolivet } else if (reuse == MAT_INITIAL_MATRIX) { 521a81fe166SPierre Jolivet for (i = 0; i < mbs; i++) { 522a81fe166SPierre Jolivet rnz = ai[i + 1] - ai[i]; 523a81fe166SPierre Jolivet ajj = aj + ai[i]; 524a81fe166SPierre Jolivet for (j = 0; j < rnz; j++) { 5259566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 5269566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 527a6053eceSJunchao Zhang nz++; 528a81fe166SPierre Jolivet } 529a81fe166SPierre Jolivet } 53008401ef6SPierre Jolivet PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 53175480915SPierre Jolivet } 532a6053eceSJunchao Zhang if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 5333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 53416ebf90aSShri Abhyankar } 53516ebf90aSShri Abhyankar 536d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 537d71ae5a4SJacob Faibussowitsch { 53867877ebaSShri Abhyankar const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 539a6053eceSJunchao Zhang PetscInt64 nz, rnz, i, j; 54067877ebaSShri Abhyankar const PetscScalar *av, *v1; 54116ebf90aSShri Abhyankar PetscScalar *val; 542a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 543829b1710SHong Zhang Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 54429b521d4Sstefano_zampini PetscBool missing; 54538548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 546b94d7dedSBarry Smith PetscBool hermitian, isset; 54738548759SBarry Smith #endif 54816ebf90aSShri Abhyankar 54916ebf90aSShri Abhyankar PetscFunctionBegin; 55038548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 551b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 552b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 55338548759SBarry Smith #endif 5549566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A, &av)); 5559371c9d4SSatish Balay ai = aa->i; 5569371c9d4SSatish Balay aj = aa->j; 55716ebf90aSShri Abhyankar adiag = aa->diag; 5589566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 559bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 5607ee00b23SStefano Zampini /* count nz in the upper triangular part of A */ 561829b1710SHong Zhang nz = 0; 56229b521d4Sstefano_zampini if (missing) { 56329b521d4Sstefano_zampini for (i = 0; i < M; i++) { 56429b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 56529b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 56629b521d4Sstefano_zampini if (aj[j] < i) continue; 56729b521d4Sstefano_zampini nz++; 56829b521d4Sstefano_zampini } 56929b521d4Sstefano_zampini } else { 57029b521d4Sstefano_zampini nz += ai[i + 1] - adiag[i]; 57129b521d4Sstefano_zampini } 57229b521d4Sstefano_zampini } 57329b521d4Sstefano_zampini } else { 574829b1710SHong Zhang for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 57529b521d4Sstefano_zampini } 5769566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 5779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 578a6053eceSJunchao Zhang mumps->nnz = nz; 579a6053eceSJunchao Zhang mumps->irn = row; 580a6053eceSJunchao Zhang mumps->jcn = col; 581a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 582185f6596SHong Zhang 58316ebf90aSShri Abhyankar nz = 0; 58429b521d4Sstefano_zampini if (missing) { 58529b521d4Sstefano_zampini for (i = 0; i < M; i++) { 58629b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 58729b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 58829b521d4Sstefano_zampini if (aj[j] < i) continue; 5899566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 5909566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 59129b521d4Sstefano_zampini val[nz] = av[j]; 59229b521d4Sstefano_zampini nz++; 59329b521d4Sstefano_zampini } 59429b521d4Sstefano_zampini } else { 59529b521d4Sstefano_zampini rnz = ai[i + 1] - adiag[i]; 59629b521d4Sstefano_zampini ajj = aj + adiag[i]; 59729b521d4Sstefano_zampini v1 = av + adiag[i]; 59829b521d4Sstefano_zampini for (j = 0; j < rnz; j++) { 5999566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 6009566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 601a6053eceSJunchao Zhang val[nz++] = v1[j]; 60229b521d4Sstefano_zampini } 60329b521d4Sstefano_zampini } 60429b521d4Sstefano_zampini } 60529b521d4Sstefano_zampini } else { 60616ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 60716ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 60867877ebaSShri Abhyankar ajj = aj + adiag[i]; 609cf3759fdSShri Abhyankar v1 = av + adiag[i]; 61067877ebaSShri Abhyankar for (j = 0; j < rnz; j++) { 6119566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 6129566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 613a6053eceSJunchao Zhang val[nz++] = v1[j]; 61416ebf90aSShri Abhyankar } 61516ebf90aSShri Abhyankar } 61629b521d4Sstefano_zampini } 617397b6df1SKris Buschelman } else { 618a6053eceSJunchao Zhang nz = 0; 619a6053eceSJunchao Zhang val = mumps->val; 62029b521d4Sstefano_zampini if (missing) { 62116ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 62229b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 62329b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 62429b521d4Sstefano_zampini if (aj[j] < i) continue; 62529b521d4Sstefano_zampini val[nz++] = av[j]; 62629b521d4Sstefano_zampini } 62729b521d4Sstefano_zampini } else { 62816ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 62967877ebaSShri Abhyankar v1 = av + adiag[i]; 630ad540459SPierre Jolivet for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 63116ebf90aSShri Abhyankar } 63216ebf90aSShri Abhyankar } 63329b521d4Sstefano_zampini } else { 63416ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 63516ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 63616ebf90aSShri Abhyankar v1 = av + adiag[i]; 637ad540459SPierre Jolivet for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 63816ebf90aSShri Abhyankar } 63916ebf90aSShri Abhyankar } 64029b521d4Sstefano_zampini } 6419566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 6423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 64316ebf90aSShri Abhyankar } 64416ebf90aSShri Abhyankar 645d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 646d71ae5a4SJacob Faibussowitsch { 647a6053eceSJunchao Zhang const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 648a6053eceSJunchao Zhang PetscInt bs; 649a6053eceSJunchao Zhang PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 650a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 65116ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 65216ebf90aSShri Abhyankar PetscScalar *val; 653397b6df1SKris Buschelman Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 654397b6df1SKris Buschelman Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 655397b6df1SKris Buschelman Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 656ec4f40fdSPierre Jolivet const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 65738548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 658b94d7dedSBarry Smith PetscBool hermitian, isset; 65938548759SBarry Smith #endif 66016ebf90aSShri Abhyankar 66116ebf90aSShri Abhyankar PetscFunctionBegin; 66238548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 663b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 664b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 66538548759SBarry Smith #endif 6669566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 66738548759SBarry Smith rstart = A->rmap->rstart; 66838548759SBarry Smith ai = aa->i; 66938548759SBarry Smith aj = aa->j; 67038548759SBarry Smith bi = bb->i; 67138548759SBarry Smith bj = bb->j; 67238548759SBarry Smith av = aa->a; 67338548759SBarry Smith bv = bb->a; 674397b6df1SKris Buschelman 6752205254eSKarl Rupp garray = mat->garray; 6762205254eSKarl Rupp 677bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 678a6053eceSJunchao Zhang nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 6799566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 6809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 681a6053eceSJunchao Zhang /* can not decide the exact mumps->nnz now because of the SBAIJ */ 682a6053eceSJunchao Zhang mumps->irn = row; 683a6053eceSJunchao Zhang mumps->jcn = col; 684a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 685397b6df1SKris Buschelman } else { 686a6053eceSJunchao Zhang val = mumps->val; 687397b6df1SKris Buschelman } 688397b6df1SKris Buschelman 6899371c9d4SSatish Balay jj = 0; 6909371c9d4SSatish Balay irow = rstart; 691ec4f40fdSPierre Jolivet for (i = 0; i < mbs; i++) { 692397b6df1SKris Buschelman ajj = aj + ai[i]; /* ptr to the beginning of this row */ 693397b6df1SKris Buschelman countA = ai[i + 1] - ai[i]; 694397b6df1SKris Buschelman countB = bi[i + 1] - bi[i]; 695397b6df1SKris Buschelman bjj = bj + bi[i]; 696ec4f40fdSPierre Jolivet v1 = av + ai[i] * bs2; 697ec4f40fdSPierre Jolivet v2 = bv + bi[i] * bs2; 698397b6df1SKris Buschelman 699ec4f40fdSPierre Jolivet if (bs > 1) { 700ec4f40fdSPierre Jolivet /* A-part */ 701ec4f40fdSPierre Jolivet for (j = 0; j < countA; j++) { 702ec4f40fdSPierre Jolivet for (k = 0; k < bs; k++) { 703ec4f40fdSPierre Jolivet for (m = 0; m < bs; m++) { 704ec4f40fdSPierre Jolivet if (rstart + ajj[j] * bs > irow || k >= m) { 705ec4f40fdSPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 7069566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 7079566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 708ec4f40fdSPierre Jolivet } 709ec4f40fdSPierre Jolivet val[jj++] = v1[j * bs2 + m + k * bs]; 710ec4f40fdSPierre Jolivet } 711ec4f40fdSPierre Jolivet } 712ec4f40fdSPierre Jolivet } 713ec4f40fdSPierre Jolivet } 714ec4f40fdSPierre Jolivet 715ec4f40fdSPierre Jolivet /* B-part */ 716ec4f40fdSPierre Jolivet for (j = 0; j < countB; j++) { 717ec4f40fdSPierre Jolivet for (k = 0; k < bs; k++) { 718ec4f40fdSPierre Jolivet for (m = 0; m < bs; m++) { 719ec4f40fdSPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 7209566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 7219566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 722ec4f40fdSPierre Jolivet } 723ec4f40fdSPierre Jolivet val[jj++] = v2[j * bs2 + m + k * bs]; 724ec4f40fdSPierre Jolivet } 725ec4f40fdSPierre Jolivet } 726ec4f40fdSPierre Jolivet } 727ec4f40fdSPierre Jolivet } else { 728397b6df1SKris Buschelman /* A-part */ 729397b6df1SKris Buschelman for (j = 0; j < countA; j++) { 730bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 7319566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 7329566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 733397b6df1SKris Buschelman } 73416ebf90aSShri Abhyankar val[jj++] = v1[j]; 735397b6df1SKris Buschelman } 73616ebf90aSShri Abhyankar 73716ebf90aSShri Abhyankar /* B-part */ 73816ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 739bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 7409566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 7419566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 742397b6df1SKris Buschelman } 74316ebf90aSShri Abhyankar val[jj++] = v2[j]; 74416ebf90aSShri Abhyankar } 74516ebf90aSShri Abhyankar } 746ec4f40fdSPierre Jolivet irow += bs; 747ec4f40fdSPierre Jolivet } 7485d955bbbSStefano Zampini if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 7493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 75016ebf90aSShri Abhyankar } 75116ebf90aSShri Abhyankar 752d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 753d71ae5a4SJacob Faibussowitsch { 75416ebf90aSShri Abhyankar const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 7555d955bbbSStefano Zampini PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 756a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 75716ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 75816ebf90aSShri Abhyankar PetscScalar *val; 759a3d589ffSStefano Zampini Mat Ad, Ao; 760a3d589ffSStefano Zampini Mat_SeqAIJ *aa; 761a3d589ffSStefano Zampini Mat_SeqAIJ *bb; 76216ebf90aSShri Abhyankar 76316ebf90aSShri Abhyankar PetscFunctionBegin; 7649566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 7659566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 7669566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 767a3d589ffSStefano Zampini 768a3d589ffSStefano Zampini aa = (Mat_SeqAIJ *)(Ad)->data; 769a3d589ffSStefano Zampini bb = (Mat_SeqAIJ *)(Ao)->data; 77038548759SBarry Smith ai = aa->i; 77138548759SBarry Smith aj = aa->j; 77238548759SBarry Smith bi = bb->i; 77338548759SBarry Smith bj = bb->j; 77416ebf90aSShri Abhyankar 775a3d589ffSStefano Zampini rstart = A->rmap->rstart; 7765d955bbbSStefano Zampini cstart = A->cmap->rstart; 7772205254eSKarl Rupp 778bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 779a6053eceSJunchao Zhang nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 7809566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 7819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 782a6053eceSJunchao Zhang mumps->nnz = nz; 783a6053eceSJunchao Zhang mumps->irn = row; 784a6053eceSJunchao Zhang mumps->jcn = col; 785a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 78616ebf90aSShri Abhyankar } else { 787a6053eceSJunchao Zhang val = mumps->val; 78816ebf90aSShri Abhyankar } 78916ebf90aSShri Abhyankar 7909371c9d4SSatish Balay jj = 0; 7919371c9d4SSatish Balay irow = rstart; 79216ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 79316ebf90aSShri Abhyankar ajj = aj + ai[i]; /* ptr to the beginning of this row */ 79416ebf90aSShri Abhyankar countA = ai[i + 1] - ai[i]; 79516ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 79616ebf90aSShri Abhyankar bjj = bj + bi[i]; 79716ebf90aSShri Abhyankar v1 = av + ai[i]; 79816ebf90aSShri Abhyankar v2 = bv + bi[i]; 79916ebf90aSShri Abhyankar 80016ebf90aSShri Abhyankar /* A-part */ 80116ebf90aSShri Abhyankar for (j = 0; j < countA; j++) { 802bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 8039566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 8045d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 80516ebf90aSShri Abhyankar } 80616ebf90aSShri Abhyankar val[jj++] = v1[j]; 80716ebf90aSShri Abhyankar } 80816ebf90aSShri Abhyankar 80916ebf90aSShri Abhyankar /* B-part */ 81016ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 811bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 8129566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 8139566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 81416ebf90aSShri Abhyankar } 81516ebf90aSShri Abhyankar val[jj++] = v2[j]; 81616ebf90aSShri Abhyankar } 81716ebf90aSShri Abhyankar irow++; 81816ebf90aSShri Abhyankar } 8199566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 8209566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 8213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 82216ebf90aSShri Abhyankar } 82316ebf90aSShri Abhyankar 824d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 825d71ae5a4SJacob Faibussowitsch { 82667877ebaSShri Abhyankar Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 82767877ebaSShri Abhyankar Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 82867877ebaSShri Abhyankar Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 82967877ebaSShri Abhyankar const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 8305d955bbbSStefano Zampini const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 83133d57670SJed Brown const PetscInt bs2 = mat->bs2; 832a6053eceSJunchao Zhang PetscInt bs; 833a6053eceSJunchao Zhang PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 834a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 83567877ebaSShri Abhyankar const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 83667877ebaSShri Abhyankar PetscScalar *val; 83767877ebaSShri Abhyankar 83867877ebaSShri Abhyankar PetscFunctionBegin; 8399566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 840bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 84167877ebaSShri Abhyankar nz = bs2 * (aa->nz + bb->nz); 8429566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 8439566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 844a6053eceSJunchao Zhang mumps->nnz = nz; 845a6053eceSJunchao Zhang mumps->irn = row; 846a6053eceSJunchao Zhang mumps->jcn = col; 847a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 84867877ebaSShri Abhyankar } else { 849a6053eceSJunchao Zhang val = mumps->val; 85067877ebaSShri Abhyankar } 85167877ebaSShri Abhyankar 8529371c9d4SSatish Balay jj = 0; 8539371c9d4SSatish Balay irow = rstart; 85467877ebaSShri Abhyankar for (i = 0; i < mbs; i++) { 85567877ebaSShri Abhyankar countA = ai[i + 1] - ai[i]; 85667877ebaSShri Abhyankar countB = bi[i + 1] - bi[i]; 85767877ebaSShri Abhyankar ajj = aj + ai[i]; 85867877ebaSShri Abhyankar bjj = bj + bi[i]; 85967877ebaSShri Abhyankar v1 = av + bs2 * ai[i]; 86067877ebaSShri Abhyankar v2 = bv + bs2 * bi[i]; 86167877ebaSShri Abhyankar 86267877ebaSShri Abhyankar idx = 0; 86367877ebaSShri Abhyankar /* A-part */ 86467877ebaSShri Abhyankar for (k = 0; k < countA; k++) { 86567877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 86667877ebaSShri Abhyankar for (n = 0; n < bs; n++) { 867bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 8689566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 8695d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 87067877ebaSShri Abhyankar } 87167877ebaSShri Abhyankar val[jj++] = v1[idx++]; 87267877ebaSShri Abhyankar } 87367877ebaSShri Abhyankar } 87467877ebaSShri Abhyankar } 87567877ebaSShri Abhyankar 87667877ebaSShri Abhyankar idx = 0; 87767877ebaSShri Abhyankar /* B-part */ 87867877ebaSShri Abhyankar for (k = 0; k < countB; k++) { 87967877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 88067877ebaSShri Abhyankar for (n = 0; n < bs; n++) { 881bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 8829566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 8839566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 88467877ebaSShri Abhyankar } 885d985c460SShri Abhyankar val[jj++] = v2[idx++]; 88667877ebaSShri Abhyankar } 88767877ebaSShri Abhyankar } 88867877ebaSShri Abhyankar } 889d985c460SShri Abhyankar irow += bs; 89067877ebaSShri Abhyankar } 8913ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 89267877ebaSShri Abhyankar } 89367877ebaSShri Abhyankar 894d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 895d71ae5a4SJacob Faibussowitsch { 89616ebf90aSShri Abhyankar const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 897a6053eceSJunchao Zhang PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 898a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 89916ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 90016ebf90aSShri Abhyankar PetscScalar *val; 901a3d589ffSStefano Zampini Mat Ad, Ao; 902a3d589ffSStefano Zampini Mat_SeqAIJ *aa; 903a3d589ffSStefano Zampini Mat_SeqAIJ *bb; 90438548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 905b94d7dedSBarry Smith PetscBool hermitian, isset; 90638548759SBarry Smith #endif 90716ebf90aSShri Abhyankar 90816ebf90aSShri Abhyankar PetscFunctionBegin; 90938548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 910b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 911b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 91238548759SBarry Smith #endif 9139566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 9149566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 9159566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 916a3d589ffSStefano Zampini 917a3d589ffSStefano Zampini aa = (Mat_SeqAIJ *)(Ad)->data; 918a3d589ffSStefano Zampini bb = (Mat_SeqAIJ *)(Ao)->data; 91938548759SBarry Smith ai = aa->i; 92038548759SBarry Smith aj = aa->j; 92138548759SBarry Smith adiag = aa->diag; 92238548759SBarry Smith bi = bb->i; 92338548759SBarry Smith bj = bb->j; 9242205254eSKarl Rupp 92516ebf90aSShri Abhyankar rstart = A->rmap->rstart; 92616ebf90aSShri Abhyankar 927bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 928e0bace9bSHong Zhang nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 929e0bace9bSHong Zhang nzb = 0; /* num of upper triangular entries in mat->B */ 93016ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 931e0bace9bSHong Zhang nza += (ai[i + 1] - adiag[i]); 93216ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 93316ebf90aSShri Abhyankar bjj = bj + bi[i]; 934e0bace9bSHong Zhang for (j = 0; j < countB; j++) { 935e0bace9bSHong Zhang if (garray[bjj[j]] > rstart) nzb++; 936e0bace9bSHong Zhang } 937e0bace9bSHong Zhang } 93816ebf90aSShri Abhyankar 939e0bace9bSHong Zhang nz = nza + nzb; /* total nz of upper triangular part of mat */ 9409566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 9419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 942a6053eceSJunchao Zhang mumps->nnz = nz; 943a6053eceSJunchao Zhang mumps->irn = row; 944a6053eceSJunchao Zhang mumps->jcn = col; 945a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 94616ebf90aSShri Abhyankar } else { 947a6053eceSJunchao Zhang val = mumps->val; 94816ebf90aSShri Abhyankar } 94916ebf90aSShri Abhyankar 9509371c9d4SSatish Balay jj = 0; 9519371c9d4SSatish Balay irow = rstart; 95216ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 95316ebf90aSShri Abhyankar ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 95416ebf90aSShri Abhyankar v1 = av + adiag[i]; 95516ebf90aSShri Abhyankar countA = ai[i + 1] - adiag[i]; 95616ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 95716ebf90aSShri Abhyankar bjj = bj + bi[i]; 95816ebf90aSShri Abhyankar v2 = bv + bi[i]; 95916ebf90aSShri Abhyankar 96016ebf90aSShri Abhyankar /* A-part */ 96116ebf90aSShri Abhyankar for (j = 0; j < countA; j++) { 962bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 9639566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 9649566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 96516ebf90aSShri Abhyankar } 96616ebf90aSShri Abhyankar val[jj++] = v1[j]; 96716ebf90aSShri Abhyankar } 96816ebf90aSShri Abhyankar 96916ebf90aSShri Abhyankar /* B-part */ 97016ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 97116ebf90aSShri Abhyankar if (garray[bjj[j]] > rstart) { 972bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 9739566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 9749566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 97516ebf90aSShri Abhyankar } 97616ebf90aSShri Abhyankar val[jj++] = v2[j]; 97716ebf90aSShri Abhyankar } 978397b6df1SKris Buschelman } 979397b6df1SKris Buschelman irow++; 980397b6df1SKris Buschelman } 9819566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 9829566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 9833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 984397b6df1SKris Buschelman } 985397b6df1SKris Buschelman 9869d0448ceSStefano Zampini PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 9879d0448ceSStefano Zampini { 9889d0448ceSStefano Zampini Mat **mats; 9899d0448ceSStefano Zampini PetscInt nr, nc; 9909d0448ceSStefano Zampini PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 9919d0448ceSStefano Zampini 9929d0448ceSStefano Zampini PetscFunctionBegin; 9939d0448ceSStefano Zampini PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 9949d0448ceSStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 9959d0448ceSStefano Zampini PetscMUMPSInt *irns, *jcns; 9969d0448ceSStefano Zampini PetscScalar *vals; 9979d0448ceSStefano Zampini PetscInt64 totnnz, cumnnz, maxnnz; 9989d0448ceSStefano Zampini PetscInt *pjcns_w; 9999d0448ceSStefano Zampini IS *rows, *cols; 10009d0448ceSStefano Zampini PetscInt **rows_idx, **cols_idx; 10019d0448ceSStefano Zampini 10029d0448ceSStefano Zampini cumnnz = 0; 10039d0448ceSStefano Zampini maxnnz = 0; 10045d955bbbSStefano Zampini PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 10059d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 10069d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 10079d0448ceSStefano Zampini Mat sub = mats[r][c]; 10089d0448ceSStefano Zampini 10099d0448ceSStefano Zampini mumps->nest_convert_to_triples[r * nc + c] = NULL; 10109d0448ceSStefano Zampini if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 10119d0448ceSStefano Zampini if (sub) { 10129d0448ceSStefano Zampini PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 10135d955bbbSStefano Zampini PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE; 10149d0448ceSStefano Zampini MatInfo info; 10159d0448ceSStefano Zampini 10165d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 10175d955bbbSStefano Zampini if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 10185d955bbbSStefano Zampini else { 10195d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 10205d955bbbSStefano Zampini if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 10215d955bbbSStefano Zampini } 10229d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 10239d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 10249d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 10259d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 10269d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 10279d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 10289d0448ceSStefano Zampini 10299d0448ceSStefano Zampini if (chol) { 10309d0448ceSStefano Zampini if (r == c) { 10319d0448ceSStefano Zampini if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 10329d0448ceSStefano Zampini else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 10339d0448ceSStefano Zampini else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 10349d0448ceSStefano Zampini else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 10359d0448ceSStefano Zampini } else { 10369d0448ceSStefano Zampini if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 10379d0448ceSStefano Zampini else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 10389d0448ceSStefano Zampini else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 10399d0448ceSStefano Zampini else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 10409d0448ceSStefano Zampini } 10419d0448ceSStefano Zampini } else { 10429d0448ceSStefano Zampini if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 10439d0448ceSStefano Zampini else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 10449d0448ceSStefano Zampini else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 10459d0448ceSStefano Zampini else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 10469d0448ceSStefano Zampini } 10479d0448ceSStefano Zampini PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 10489d0448ceSStefano Zampini mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 10499d0448ceSStefano Zampini PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 10509d0448ceSStefano Zampini cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 10519d0448ceSStefano Zampini maxnnz = PetscMax(maxnnz, info.nz_used); 10529d0448ceSStefano Zampini } 10539d0448ceSStefano Zampini } 10549d0448ceSStefano Zampini } 10559d0448ceSStefano Zampini 10569d0448ceSStefano Zampini /* Allocate total COO */ 10579d0448ceSStefano Zampini totnnz = cumnnz; 10589d0448ceSStefano Zampini PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 10599d0448ceSStefano Zampini PetscCall(PetscMalloc1(totnnz, &vals)); 10609d0448ceSStefano Zampini 10619d0448ceSStefano Zampini /* Handle rows and column maps 10629d0448ceSStefano Zampini We directly map rows and use an SF for the columns */ 10639d0448ceSStefano Zampini PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 10649d0448ceSStefano Zampini PetscCall(MatNestGetISs(A, rows, cols)); 10659d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 10669d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 10679d0448ceSStefano Zampini if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 10685d955bbbSStefano Zampini else (void)maxnnz; 10699d0448ceSStefano Zampini 10709d0448ceSStefano Zampini cumnnz = 0; 10719d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 10729d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 10739d0448ceSStefano Zampini Mat sub = mats[r][c]; 10749d0448ceSStefano Zampini const PetscInt *ridx = rows_idx[r]; 10755d955bbbSStefano Zampini const PetscInt *cidx = cols_idx[c]; 10769d0448ceSStefano Zampini PetscInt rst; 10779d0448ceSStefano Zampini PetscSF csf; 10785d955bbbSStefano Zampini PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 10795d955bbbSStefano Zampini PetscLayout cmap; 10809d0448ceSStefano Zampini 10819d0448ceSStefano Zampini mumps->nest_vals_start[r * nc + c] = cumnnz; 10829d0448ceSStefano Zampini if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 10839d0448ceSStefano Zampini 10845d955bbbSStefano Zampini /* Extract inner blocks if needed */ 10855d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 10865d955bbbSStefano Zampini if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 10875d955bbbSStefano Zampini else { 10885d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 10895d955bbbSStefano Zampini if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 10905d955bbbSStefano Zampini } 10915d955bbbSStefano Zampini swap = (PetscBool)(isTrans || isHTrans); 10925d955bbbSStefano Zampini 10935d955bbbSStefano Zampini /* Get column layout to map off-process columns */ 10945d955bbbSStefano Zampini PetscCall(MatGetLayouts(sub, NULL, &cmap)); 10955d955bbbSStefano Zampini 10965d955bbbSStefano Zampini /* Get row start to map on-process rows */ 10975d955bbbSStefano Zampini PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 10985d955bbbSStefano Zampini 10999d0448ceSStefano Zampini /* Directly use the mumps datastructure and use C ordering for now */ 11009d0448ceSStefano Zampini PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 11019d0448ceSStefano Zampini 11025d955bbbSStefano Zampini /* Swap the role of rows and columns indices for transposed blocks 11035d955bbbSStefano Zampini since we need values with global final ordering */ 11045d955bbbSStefano Zampini if (swap) { 11055d955bbbSStefano Zampini cidx = rows_idx[r]; 11065d955bbbSStefano Zampini ridx = cols_idx[c]; 11079d0448ceSStefano Zampini } 11089d0448ceSStefano Zampini 11095d955bbbSStefano Zampini /* Communicate column indices 11105d955bbbSStefano Zampini This could have been done with a single SF but it would have complicated the code a lot. 11115d955bbbSStefano Zampini But since we do it only once, we pay the price of setting up an SF for each block */ 11125d955bbbSStefano Zampini if (PetscDefined(USE_64BIT_INDICES)) { 11135d955bbbSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 11145d955bbbSStefano Zampini } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 11159d0448ceSStefano Zampini PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 11165d955bbbSStefano Zampini PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 11175d955bbbSStefano Zampini PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 11185d955bbbSStefano Zampini PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 11199d0448ceSStefano Zampini PetscCall(PetscSFDestroy(&csf)); 11209d0448ceSStefano Zampini 11215d955bbbSStefano Zampini /* Import indices: use direct map for rows and mapped indices for columns */ 11225d955bbbSStefano Zampini if (swap) { 11235d955bbbSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) { 11245d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 11255d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 11265d955bbbSStefano Zampini } 11275d955bbbSStefano Zampini } else { 11285d955bbbSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) { 11295d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 11305d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 11315d955bbbSStefano Zampini } 11325d955bbbSStefano Zampini } 11335d955bbbSStefano Zampini 11345d955bbbSStefano Zampini /* Import values to full COO */ 11355d955bbbSStefano Zampini if (isHTrans) { /* conjugate the entries */ 11365d955bbbSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 11375d955bbbSStefano Zampini } 11385d955bbbSStefano Zampini PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 11399d0448ceSStefano Zampini 11409d0448ceSStefano Zampini /* Shift new starting point and sanity check */ 11419d0448ceSStefano Zampini cumnnz += mumps->nnz; 11429d0448ceSStefano Zampini PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 11439d0448ceSStefano Zampini 11449d0448ceSStefano Zampini /* Free scratch memory */ 11459d0448ceSStefano Zampini PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 11469d0448ceSStefano Zampini PetscCall(PetscFree(mumps->val_alloc)); 11479d0448ceSStefano Zampini mumps->val = NULL; 11489d0448ceSStefano Zampini mumps->nnz = 0; 11499d0448ceSStefano Zampini } 11509d0448ceSStefano Zampini } 11519d0448ceSStefano Zampini if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 11529d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 11539d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 11549d0448ceSStefano Zampini PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 11559d0448ceSStefano Zampini if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 11565d955bbbSStefano Zampini mumps->nest_vals_start[nr * nc] = cumnnz; 11579d0448ceSStefano Zampini 11589d0448ceSStefano Zampini /* Set pointers for final MUMPS data structure */ 11599d0448ceSStefano Zampini mumps->nest_vals = vals; 11609d0448ceSStefano Zampini mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 11619d0448ceSStefano Zampini mumps->val = vals; 11629d0448ceSStefano Zampini mumps->irn = irns; 11639d0448ceSStefano Zampini mumps->jcn = jcns; 11649d0448ceSStefano Zampini mumps->nnz = cumnnz; 11659d0448ceSStefano Zampini } else { 11669d0448ceSStefano Zampini PetscScalar *oval = mumps->nest_vals; 11679d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 11689d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 11695d955bbbSStefano Zampini PetscBool isTrans, isHTrans = PETSC_FALSE; 11705d955bbbSStefano Zampini Mat sub = mats[r][c]; 11715d955bbbSStefano Zampini PetscInt midx = r * nc + c; 11725d955bbbSStefano Zampini 11735d955bbbSStefano Zampini if (!mumps->nest_convert_to_triples[midx]) continue; 11745d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 11755d955bbbSStefano Zampini if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 11765d955bbbSStefano Zampini else { 11775d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 11785d955bbbSStefano Zampini if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 11795d955bbbSStefano Zampini } 11805d955bbbSStefano Zampini mumps->val = oval + mumps->nest_vals_start[midx]; 11815d955bbbSStefano Zampini PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 11825d955bbbSStefano Zampini if (isHTrans) { 11835d955bbbSStefano Zampini PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 11845d955bbbSStefano Zampini for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 11855d955bbbSStefano Zampini } 11869d0448ceSStefano Zampini } 11879d0448ceSStefano Zampini } 11889d0448ceSStefano Zampini mumps->val = oval; 11899d0448ceSStefano Zampini } 11909d0448ceSStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 11919d0448ceSStefano Zampini } 11929d0448ceSStefano Zampini 1193d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_MUMPS(Mat A) 1194d71ae5a4SJacob Faibussowitsch { 1195a6053eceSJunchao Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1196b24902e0SBarry Smith 1197397b6df1SKris Buschelman PetscFunctionBegin; 11989566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 11999566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 12009566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 12019566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->b_seq)); 12029566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->x_seq)); 12039566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.perm_in)); 12049566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 12059566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->val_alloc)); 12069566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->info)); 1207413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->ICNTL_pre)); 1208413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->CNTL_pre)); 12099566063dSJacob Faibussowitsch PetscCall(MatMumpsResetSchur_Private(mumps)); 1210413bcc21SPierre Jolivet if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1211a5e57a09SHong Zhang mumps->id.job = JOB_END; 12123ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 121308401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1214413bcc21SPierre Jolivet if (mumps->mumps_comm != MPI_COMM_NULL) { 1215413bcc21SPierre Jolivet if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1216413bcc21SPierre Jolivet else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1217413bcc21SPierre Jolivet } 1218413bcc21SPierre Jolivet } 12193ab56b82SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 122067602552SJunchao Zhang if (mumps->use_petsc_omp_support) { 12219566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 12229566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 12239566063dSJacob Faibussowitsch PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 122467602552SJunchao Zhang } 12253ab56b82SJunchao Zhang #endif 12269566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ia_alloc)); 12279566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ja_alloc)); 12289566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->recvcount)); 12299566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->reqs)); 12309566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->irhs_loc)); 12319d0448ceSStefano Zampini PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 12329d0448ceSStefano Zampini PetscCall(PetscFree(mumps->nest_vals)); 12339566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1234bf0cc555SLisandro Dalcin 123597969023SHong Zhang /* clear composed functions */ 12369566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 12379566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 12389566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 12399566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 12409566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 12419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 12429566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 12439566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 12449566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 12459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 12469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 12475c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 12489566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 12499566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 12503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1251397b6df1SKris Buschelman } 1252397b6df1SKris Buschelman 125367602552SJunchao Zhang /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1254d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1255d71ae5a4SJacob Faibussowitsch { 125667602552SJunchao Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 125767602552SJunchao Zhang const PetscMPIInt ompsize = mumps->omp_comm_size; 125867602552SJunchao Zhang PetscInt i, m, M, rstart; 125967602552SJunchao Zhang 126067602552SJunchao Zhang PetscFunctionBegin; 12619566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &M, NULL)); 12629566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &m, NULL)); 126308401ef6SPierre Jolivet PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 126467602552SJunchao Zhang if (ompsize == 1) { 126567602552SJunchao Zhang if (!mumps->irhs_loc) { 126667602552SJunchao Zhang mumps->nloc_rhs = m; 12679566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 12689566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 126967602552SJunchao Zhang for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 127067602552SJunchao Zhang } 127167602552SJunchao Zhang mumps->id.rhs_loc = (MumpsScalar *)array; 127267602552SJunchao Zhang } else { 127367602552SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 127467602552SJunchao Zhang const PetscInt *ranges; 127567602552SJunchao Zhang PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 127667602552SJunchao Zhang MPI_Group petsc_group, omp_group; 127767602552SJunchao Zhang PetscScalar *recvbuf = NULL; 127867602552SJunchao Zhang 127967602552SJunchao Zhang if (mumps->is_omp_master) { 128067602552SJunchao Zhang /* Lazily initialize the omp stuff for distributed rhs */ 128167602552SJunchao Zhang if (!mumps->irhs_loc) { 12829566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 12839566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 12849566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 12859566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 128667602552SJunchao Zhang for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 12879566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 128867602552SJunchao Zhang 128967602552SJunchao Zhang /* Populate mumps->irhs_loc[], rhs_nrow[] */ 129067602552SJunchao Zhang mumps->nloc_rhs = 0; 12919566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(A, &ranges)); 129267602552SJunchao Zhang for (j = 0; j < ompsize; j++) { 129367602552SJunchao Zhang mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 129467602552SJunchao Zhang mumps->nloc_rhs += mumps->rhs_nrow[j]; 129567602552SJunchao Zhang } 12969566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 129767602552SJunchao Zhang for (j = k = 0; j < ompsize; j++) { 129867602552SJunchao Zhang for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 129967602552SJunchao Zhang } 130067602552SJunchao Zhang 13019566063dSJacob Faibussowitsch PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 13029566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&petsc_group)); 13039566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&omp_group)); 130467602552SJunchao Zhang } 130567602552SJunchao Zhang 130667602552SJunchao Zhang /* Realloc buffers when current nrhs is bigger than what we have met */ 130767602552SJunchao Zhang if (nrhs > mumps->max_nrhs) { 13089566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 13099566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 131067602552SJunchao Zhang mumps->max_nrhs = nrhs; 131167602552SJunchao Zhang } 131267602552SJunchao Zhang 131367602552SJunchao Zhang /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 13149566063dSJacob Faibussowitsch for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 131567602552SJunchao Zhang mumps->rhs_disps[0] = 0; 131667602552SJunchao Zhang for (j = 1; j < ompsize; j++) { 131767602552SJunchao Zhang mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 131808401ef6SPierre Jolivet PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 131967602552SJunchao Zhang } 132067602552SJunchao Zhang recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 132167602552SJunchao Zhang } 132267602552SJunchao Zhang 13239566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 13249566063dSJacob Faibussowitsch PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 132567602552SJunchao Zhang 132667602552SJunchao Zhang if (mumps->is_omp_master) { 132767602552SJunchao Zhang if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 132867602552SJunchao Zhang PetscScalar *dst, *dstbase = mumps->rhs_loc; 132967602552SJunchao Zhang for (j = 0; j < ompsize; j++) { 133067602552SJunchao Zhang const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 133167602552SJunchao Zhang dst = dstbase; 133267602552SJunchao Zhang for (i = 0; i < nrhs; i++) { 13339566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 133467602552SJunchao Zhang src += mumps->rhs_nrow[j]; 133567602552SJunchao Zhang dst += mumps->nloc_rhs; 133667602552SJunchao Zhang } 133767602552SJunchao Zhang dstbase += mumps->rhs_nrow[j]; 133867602552SJunchao Zhang } 133967602552SJunchao Zhang } 134067602552SJunchao Zhang mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 134167602552SJunchao Zhang } 134267602552SJunchao Zhang #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 134367602552SJunchao Zhang } 134467602552SJunchao Zhang mumps->id.nrhs = nrhs; 134567602552SJunchao Zhang mumps->id.nloc_rhs = mumps->nloc_rhs; 134667602552SJunchao Zhang mumps->id.lrhs_loc = mumps->nloc_rhs; 134767602552SJunchao Zhang mumps->id.irhs_loc = mumps->irhs_loc; 13483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 134967602552SJunchao Zhang } 135067602552SJunchao Zhang 1351d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1352d71ae5a4SJacob Faibussowitsch { 1353e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 135425aac85cSJunchao Zhang const PetscScalar *rarray = NULL; 1355d54de34fSKris Buschelman PetscScalar *array; 1356329ec9b3SHong Zhang IS is_iden, is_petsc; 1357329ec9b3SHong Zhang PetscInt i; 1358cc86f929SStefano Zampini PetscBool second_solve = PETSC_FALSE; 1359883f2eb9SBarry Smith static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1360397b6df1SKris Buschelman 1361397b6df1SKris Buschelman PetscFunctionBegin; 13629371c9d4SSatish Balay PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 13639371c9d4SSatish Balay "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 13649371c9d4SSatish Balay &cite1)); 13659371c9d4SSatish Balay PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 13669371c9d4SSatish Balay "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 13679371c9d4SSatish Balay &cite2)); 13682aca8efcSHong Zhang 1369603e8f96SBarry Smith if (A->factorerrortype) { 13709566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 13719566063dSJacob Faibussowitsch PetscCall(VecSetInf(x)); 13723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13732aca8efcSHong Zhang } 13742aca8efcSHong Zhang 1375a5e57a09SHong Zhang mumps->id.nrhs = 1; 13762d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 137725aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 137867602552SJunchao Zhang mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 13799566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(b, &rarray)); 13809566063dSJacob Faibussowitsch PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 138125aac85cSJunchao Zhang } else { 138241ffd417SStefano Zampini mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 13839566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 13849566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 138567602552SJunchao Zhang if (!mumps->myid) { 13869566063dSJacob Faibussowitsch PetscCall(VecGetArray(mumps->b_seq, &array)); 138767602552SJunchao Zhang mumps->id.rhs = (MumpsScalar *)array; 138867602552SJunchao Zhang } 138925aac85cSJunchao Zhang } 13903ab56b82SJunchao Zhang } else { /* petsc_size == 1 */ 139167602552SJunchao Zhang mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 13929566063dSJacob Faibussowitsch PetscCall(VecCopy(b, x)); 13939566063dSJacob Faibussowitsch PetscCall(VecGetArray(x, &array)); 1394940cd9d6SSatish Balay mumps->id.rhs = (MumpsScalar *)array; 1395397b6df1SKris Buschelman } 1396397b6df1SKris Buschelman 1397cc86f929SStefano Zampini /* 1398cc86f929SStefano Zampini handle condensation step of Schur complement (if any) 1399cc86f929SStefano Zampini We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1400cc86f929SStefano Zampini According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1401cc86f929SStefano Zampini Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1402cc86f929SStefano Zampini This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1403cc86f929SStefano Zampini */ 1404583f777eSStefano Zampini if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 140508401ef6SPierre Jolivet PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1406cc86f929SStefano Zampini second_solve = PETSC_TRUE; 14079566063dSJacob Faibussowitsch PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1408cc86f929SStefano Zampini } 1409397b6df1SKris Buschelman /* solve phase */ 1410a5e57a09SHong Zhang mumps->id.job = JOB_SOLVE; 14113ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 141208401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1413397b6df1SKris Buschelman 1414b5fa320bSStefano Zampini /* handle expansion step of Schur complement (if any) */ 14151baa6e33SBarry Smith if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1416b5fa320bSStefano Zampini 14172d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1418a5e57a09SHong Zhang if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1419a5e57a09SHong Zhang /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 14209566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1421397b6df1SKris Buschelman } 1422a5e57a09SHong Zhang if (!mumps->scat_sol) { /* create scatter scat_sol */ 1423a6053eceSJunchao Zhang PetscInt *isol2_loc = NULL; 14249566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 14259566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1426a6053eceSJunchao Zhang for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 14279566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 14289566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 14299566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_iden)); 14309566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_petsc)); 1431a5e57a09SHong Zhang mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1432397b6df1SKris Buschelman } 1433a5e57a09SHong Zhang 14349566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 14359566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1436329ec9b3SHong Zhang } 1437353d7d71SJunchao Zhang 143867602552SJunchao Zhang if (mumps->petsc_size > 1) { 143925aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 14409566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(b, &rarray)); 144125aac85cSJunchao Zhang } else if (!mumps->myid) { 14429566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mumps->b_seq, &array)); 144325aac85cSJunchao Zhang } 14449566063dSJacob Faibussowitsch } else PetscCall(VecRestoreArray(x, &array)); 1445353d7d71SJunchao Zhang 144664412097SPierre Jolivet PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 14473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1448397b6df1SKris Buschelman } 1449397b6df1SKris Buschelman 1450d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1451d71ae5a4SJacob Faibussowitsch { 1452e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1453338d3105SPierre Jolivet const PetscMUMPSInt value = mumps->id.ICNTL(9); 145451d5961aSHong Zhang 145551d5961aSHong Zhang PetscFunctionBegin; 1456a5e57a09SHong Zhang mumps->id.ICNTL(9) = 0; 14579566063dSJacob Faibussowitsch PetscCall(MatSolve_MUMPS(A, b, x)); 1458338d3105SPierre Jolivet mumps->id.ICNTL(9) = value; 14593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 146051d5961aSHong Zhang } 146151d5961aSHong Zhang 1462d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1463d71ae5a4SJacob Faibussowitsch { 1464b8491c3eSStefano Zampini Mat Bt = NULL; 1465a6053eceSJunchao Zhang PetscBool denseX, denseB, flg, flgT; 1466e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1467334c5f61SHong Zhang PetscInt i, nrhs, M; 14681683a169SBarry Smith PetscScalar *array; 14691683a169SBarry Smith const PetscScalar *rbray; 1470a6053eceSJunchao Zhang PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1471a6053eceSJunchao Zhang PetscMUMPSInt *isol_loc, *isol_loc_save; 14721683a169SBarry Smith PetscScalar *bray, *sol_loc, *sol_loc_save; 1473be818407SHong Zhang IS is_to, is_from; 1474beae5ec0SHong Zhang PetscInt k, proc, j, m, myrstart; 1475be818407SHong Zhang const PetscInt *rstart; 147667602552SJunchao Zhang Vec v_mpi, msol_loc; 147767602552SJunchao Zhang VecScatter scat_sol; 147867602552SJunchao Zhang Vec b_seq; 147967602552SJunchao Zhang VecScatter scat_rhs; 1480be818407SHong Zhang PetscScalar *aa; 1481be818407SHong Zhang PetscInt spnr, *ia, *ja; 1482d56c302dSHong Zhang Mat_MPIAIJ *b = NULL; 1483bda8bf91SBarry Smith 1484e0b74bf9SHong Zhang PetscFunctionBegin; 14859566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 148628b400f6SJacob Faibussowitsch PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1487be818407SHong Zhang 14889566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1489a6053eceSJunchao Zhang if (denseB) { 149008401ef6SPierre Jolivet PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1491be818407SHong Zhang mumps->id.ICNTL(20) = 0; /* dense RHS */ 14920e6b8875SHong Zhang } else { /* sparse B */ 149308401ef6SPierre Jolivet PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1494013e2dc7SBarry Smith PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1495da81f932SPierre Jolivet if (flgT) { /* input B is transpose of actual RHS matrix, 14960e6b8875SHong Zhang because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 14979566063dSJacob Faibussowitsch PetscCall(MatTransposeGetMat(B, &Bt)); 1498013e2dc7SBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1499be818407SHong Zhang mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1500b8491c3eSStefano Zampini } 150187b22cf4SHong Zhang 15029566063dSJacob Faibussowitsch PetscCall(MatGetSize(B, &M, &nrhs)); 15039481e6e9SHong Zhang mumps->id.nrhs = nrhs; 15049481e6e9SHong Zhang mumps->id.lrhs = M; 15052b691707SHong Zhang mumps->id.rhs = NULL; 15069481e6e9SHong Zhang 15072d4298aeSJunchao Zhang if (mumps->petsc_size == 1) { 1508b8491c3eSStefano Zampini PetscScalar *aa; 1509b8491c3eSStefano Zampini PetscInt spnr, *ia, *ja; 1510e94cce23SStefano Zampini PetscBool second_solve = PETSC_FALSE; 1511b8491c3eSStefano Zampini 15129566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &array)); 1513b8491c3eSStefano Zampini mumps->id.rhs = (MumpsScalar *)array; 15142b691707SHong Zhang 1515a6053eceSJunchao Zhang if (denseB) { 15162b691707SHong Zhang /* copy B to X */ 15179566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &rbray)); 15189566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 15199566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 15202b691707SHong Zhang } else { /* sparse B */ 15219566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(Bt, &aa)); 15229566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 152328b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 15249566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1525b8491c3eSStefano Zampini mumps->id.rhs_sparse = (MumpsScalar *)aa; 1526b8491c3eSStefano Zampini } 1527e94cce23SStefano Zampini /* handle condensation step of Schur complement (if any) */ 1528583f777eSStefano Zampini if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1529e94cce23SStefano Zampini second_solve = PETSC_TRUE; 15309566063dSJacob Faibussowitsch PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1531e94cce23SStefano Zampini } 15322cd7d884SHong Zhang /* solve phase */ 15332cd7d884SHong Zhang mumps->id.job = JOB_SOLVE; 15343ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 153508401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1536b5fa320bSStefano Zampini 1537b5fa320bSStefano Zampini /* handle expansion step of Schur complement (if any) */ 15381baa6e33SBarry Smith if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1539a6053eceSJunchao Zhang if (!denseB) { /* sparse B */ 15409566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 15419566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 154228b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1543b8491c3eSStefano Zampini } 15449566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &array)); 15453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1546be818407SHong Zhang } 1547801fbe65SHong Zhang 15482ef1f0ffSBarry Smith /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1549aed4548fSBarry Smith PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1550241dbb5eSStefano Zampini 1551beae5ec0SHong Zhang /* create msol_loc to hold mumps local solution */ 15521683a169SBarry Smith isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 15531683a169SBarry Smith sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1554801fbe65SHong Zhang 1555a1dfcbd9SJunchao Zhang lsol_loc = mumps->id.lsol_loc; 155671aed81dSHong Zhang nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 15579566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1558940cd9d6SSatish Balay mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1559801fbe65SHong Zhang mumps->id.isol_loc = isol_loc; 1560801fbe65SHong Zhang 15619566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 15622cd7d884SHong Zhang 156367602552SJunchao Zhang if (denseB) { 156425aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 156567602552SJunchao Zhang mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 15669566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &rbray)); 15679566063dSJacob Faibussowitsch PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 15689566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 15699566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(B, &m, NULL)); 15709566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 157125aac85cSJunchao Zhang } else { 157225aac85cSJunchao Zhang mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 157380577c12SJunchao Zhang /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 157480577c12SJunchao Zhang very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 157580577c12SJunchao Zhang 0, re-arrange B into desired order, which is a local operation. 157680577c12SJunchao Zhang */ 157780577c12SJunchao Zhang 157867602552SJunchao Zhang /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1579be818407SHong Zhang /* wrap dense rhs matrix B into a vector v_mpi */ 15809566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(B, &m, NULL)); 15819566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(B, &bray)); 15829566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 15839566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(B, &bray)); 15842b691707SHong Zhang 1585be818407SHong Zhang /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1586801fbe65SHong Zhang if (!mumps->myid) { 1587beae5ec0SHong Zhang PetscInt *idx; 1588beae5ec0SHong Zhang /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 15899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrhs * M, &idx)); 15909566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(B, &rstart)); 1591be818407SHong Zhang k = 0; 15922d4298aeSJunchao Zhang for (proc = 0; proc < mumps->petsc_size; proc++) { 1593be818407SHong Zhang for (j = 0; j < nrhs; j++) { 1594beae5ec0SHong Zhang for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1595be818407SHong Zhang } 1596be818407SHong Zhang } 1597be818407SHong Zhang 15989566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 15999566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 16009566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1601801fbe65SHong Zhang } else { 16029566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 16039566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 16049566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1605801fbe65SHong Zhang } 16069566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 16079566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 16089566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_to)); 16099566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_from)); 16109566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1611801fbe65SHong Zhang 1612801fbe65SHong Zhang if (!mumps->myid) { /* define rhs on the host */ 16139566063dSJacob Faibussowitsch PetscCall(VecGetArray(b_seq, &bray)); 1614940cd9d6SSatish Balay mumps->id.rhs = (MumpsScalar *)bray; 16159566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b_seq, &bray)); 1616801fbe65SHong Zhang } 161725aac85cSJunchao Zhang } 16182b691707SHong Zhang } else { /* sparse B */ 16192b691707SHong Zhang b = (Mat_MPIAIJ *)Bt->data; 16202b691707SHong Zhang 1621be818407SHong Zhang /* wrap dense X into a vector v_mpi */ 16229566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(X, &m, NULL)); 16239566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &bray)); 16249566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 16259566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &bray)); 16262b691707SHong Zhang 16272b691707SHong Zhang if (!mumps->myid) { 16289566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(b->A, &aa)); 16299566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 163028b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 16319566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 16322b691707SHong Zhang mumps->id.rhs_sparse = (MumpsScalar *)aa; 16332b691707SHong Zhang } else { 16342b691707SHong Zhang mumps->id.irhs_ptr = NULL; 16352b691707SHong Zhang mumps->id.irhs_sparse = NULL; 16362b691707SHong Zhang mumps->id.nz_rhs = 0; 16372b691707SHong Zhang mumps->id.rhs_sparse = NULL; 16382b691707SHong Zhang } 16392b691707SHong Zhang } 16402b691707SHong Zhang 1641801fbe65SHong Zhang /* solve phase */ 1642801fbe65SHong Zhang mumps->id.job = JOB_SOLVE; 16433ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 164408401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1645801fbe65SHong Zhang 1646334c5f61SHong Zhang /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 16479566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &array)); 16489566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(v_mpi, array)); 1649801fbe65SHong Zhang 1650334c5f61SHong Zhang /* create scatter scat_sol */ 16519566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(X, &rstart)); 1652beae5ec0SHong Zhang /* iidx: index for scatter mumps solution to petsc X */ 1653beae5ec0SHong Zhang 16549566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 16559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1656beae5ec0SHong Zhang for (i = 0; i < lsol_loc; i++) { 1657beae5ec0SHong Zhang isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1658beae5ec0SHong Zhang 16592d4298aeSJunchao Zhang for (proc = 0; proc < mumps->petsc_size; proc++) { 1660beae5ec0SHong Zhang if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1661beae5ec0SHong Zhang myrstart = rstart[proc]; 1662beae5ec0SHong Zhang k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1663beae5ec0SHong Zhang iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1664beae5ec0SHong Zhang m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1665beae5ec0SHong Zhang break; 1666be818407SHong Zhang } 1667be818407SHong Zhang } 1668be818407SHong Zhang 1669beae5ec0SHong Zhang for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1670801fbe65SHong Zhang } 16719566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 16729566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 16739566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 16749566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_from)); 16759566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_to)); 16769566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 16779566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &array)); 167871aed81dSHong Zhang 167971aed81dSHong Zhang /* free spaces */ 16801683a169SBarry Smith mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 168171aed81dSHong Zhang mumps->id.isol_loc = isol_loc_save; 168271aed81dSHong Zhang 16839566063dSJacob Faibussowitsch PetscCall(PetscFree2(sol_loc, isol_loc)); 16849566063dSJacob Faibussowitsch PetscCall(PetscFree(idxx)); 16859566063dSJacob Faibussowitsch PetscCall(VecDestroy(&msol_loc)); 16869566063dSJacob Faibussowitsch PetscCall(VecDestroy(&v_mpi)); 1687a6053eceSJunchao Zhang if (!denseB) { 16882b691707SHong Zhang if (!mumps->myid) { 1689d56c302dSHong Zhang b = (Mat_MPIAIJ *)Bt->data; 16909566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 16919566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 169228b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 16932b691707SHong Zhang } 16942b691707SHong Zhang } else { 169525aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { 16969566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b_seq)); 16979566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&scat_rhs)); 169825aac85cSJunchao Zhang } 16992b691707SHong Zhang } 17009566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&scat_sol)); 170164412097SPierre Jolivet PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)))); 17023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1703e0b74bf9SHong Zhang } 1704e0b74bf9SHong Zhang 1705d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1706d71ae5a4SJacob Faibussowitsch { 1707b18964edSHong Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1708338d3105SPierre Jolivet const PetscMUMPSInt value = mumps->id.ICNTL(9); 1709b18964edSHong Zhang 1710b18964edSHong Zhang PetscFunctionBegin; 1711b18964edSHong Zhang mumps->id.ICNTL(9) = 0; 1712b18964edSHong Zhang PetscCall(MatMatSolve_MUMPS(A, B, X)); 1713338d3105SPierre Jolivet mumps->id.ICNTL(9) = value; 17143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1715b18964edSHong Zhang } 1716b18964edSHong Zhang 1717d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1718d71ae5a4SJacob Faibussowitsch { 1719eb3ef3b2SHong Zhang PetscBool flg; 1720eb3ef3b2SHong Zhang Mat B; 1721eb3ef3b2SHong Zhang 1722eb3ef3b2SHong Zhang PetscFunctionBegin; 17239566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 172428b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1725eb3ef3b2SHong Zhang 1726eb3ef3b2SHong Zhang /* Create B=Bt^T that uses Bt's data structure */ 17279566063dSJacob Faibussowitsch PetscCall(MatCreateTranspose(Bt, &B)); 1728eb3ef3b2SHong Zhang 17299566063dSJacob Faibussowitsch PetscCall(MatMatSolve_MUMPS(A, B, X)); 17309566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 17313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1732eb3ef3b2SHong Zhang } 1733eb3ef3b2SHong Zhang 1734ace3df97SHong Zhang #if !defined(PETSC_USE_COMPLEX) 1735a58c3f20SHong Zhang /* 1736a58c3f20SHong Zhang input: 1737a58c3f20SHong Zhang F: numeric factor 1738a58c3f20SHong Zhang output: 1739a58c3f20SHong Zhang nneg: total number of negative pivots 174019d49a3bSHong Zhang nzero: total number of zero pivots 174119d49a3bSHong Zhang npos: (global dimension of F) - nneg - nzero 1742a58c3f20SHong Zhang */ 1743d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1744d71ae5a4SJacob Faibussowitsch { 1745e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1746c1490034SHong Zhang PetscMPIInt size; 1747a58c3f20SHong Zhang 1748a58c3f20SHong Zhang PetscFunctionBegin; 17499566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1750bcb30aebSHong Zhang /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1751aed4548fSBarry Smith PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1752ed85ac9fSHong Zhang 1753710ac8efSHong Zhang if (nneg) *nneg = mumps->id.INFOG(12); 1754ed85ac9fSHong Zhang if (nzero || npos) { 175508401ef6SPierre Jolivet PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1756710ac8efSHong Zhang if (nzero) *nzero = mumps->id.INFOG(28); 1757710ac8efSHong Zhang if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1758a58c3f20SHong Zhang } 17593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1760a58c3f20SHong Zhang } 176119d49a3bSHong Zhang #endif 1762a58c3f20SHong Zhang 1763d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1764d71ae5a4SJacob Faibussowitsch { 1765a6053eceSJunchao Zhang PetscInt i, nreqs; 1766a6053eceSJunchao Zhang PetscMUMPSInt *irn, *jcn; 1767a6053eceSJunchao Zhang PetscMPIInt count; 1768a6053eceSJunchao Zhang PetscInt64 totnnz, remain; 1769a6053eceSJunchao Zhang const PetscInt osize = mumps->omp_comm_size; 1770a6053eceSJunchao Zhang PetscScalar *val; 17713ab56b82SJunchao Zhang 17723ab56b82SJunchao Zhang PetscFunctionBegin; 1773a6053eceSJunchao Zhang if (osize > 1) { 17743ab56b82SJunchao Zhang if (reuse == MAT_INITIAL_MATRIX) { 17753ab56b82SJunchao Zhang /* master first gathers counts of nonzeros to receive */ 17769566063dSJacob Faibussowitsch if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 17779566063dSJacob Faibussowitsch PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 17783ab56b82SJunchao Zhang 1779a6053eceSJunchao Zhang /* Then each computes number of send/recvs */ 17803ab56b82SJunchao Zhang if (mumps->is_omp_master) { 1781a6053eceSJunchao Zhang /* Start from 1 since self communication is not done in MPI */ 1782a6053eceSJunchao Zhang nreqs = 0; 1783a6053eceSJunchao Zhang for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1784a6053eceSJunchao Zhang } else { 1785a6053eceSJunchao Zhang nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 17863ab56b82SJunchao Zhang } 178735cb6cd3SPierre Jolivet PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 17883ab56b82SJunchao Zhang 1789a6053eceSJunchao Zhang /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1790a6053eceSJunchao Zhang MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1791a6053eceSJunchao Zhang might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1792a6053eceSJunchao Zhang is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1793a6053eceSJunchao Zhang */ 1794a6053eceSJunchao Zhang nreqs = 0; /* counter for actual send/recvs */ 17953ab56b82SJunchao Zhang if (mumps->is_omp_master) { 1796a6053eceSJunchao Zhang for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 17979566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 17989566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(totnnz, &val)); 1799a6053eceSJunchao Zhang 1800a6053eceSJunchao Zhang /* Self communication */ 18019566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 18029566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 18039566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1804a6053eceSJunchao Zhang 1805a6053eceSJunchao Zhang /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 18069566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 18079566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->val_alloc)); 1808a6053eceSJunchao Zhang mumps->nnz = totnnz; 18093ab56b82SJunchao Zhang mumps->irn = irn; 18103ab56b82SJunchao Zhang mumps->jcn = jcn; 1811a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 1812a6053eceSJunchao Zhang 1813a6053eceSJunchao Zhang irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1814a6053eceSJunchao Zhang jcn += mumps->recvcount[0]; 1815a6053eceSJunchao Zhang val += mumps->recvcount[0]; 1816a6053eceSJunchao Zhang 1817a6053eceSJunchao Zhang /* Remote communication */ 1818a6053eceSJunchao Zhang for (i = 1; i < osize; i++) { 1819a6053eceSJunchao Zhang count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1820a6053eceSJunchao Zhang remain = mumps->recvcount[i] - count; 1821a6053eceSJunchao Zhang while (count > 0) { 18229566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 18239566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 18249566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1825a6053eceSJunchao Zhang irn += count; 1826a6053eceSJunchao Zhang jcn += count; 1827a6053eceSJunchao Zhang val += count; 1828a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1829a6053eceSJunchao Zhang remain -= count; 1830a6053eceSJunchao Zhang } 18313ab56b82SJunchao Zhang } 18323ab56b82SJunchao Zhang } else { 1833a6053eceSJunchao Zhang irn = mumps->irn; 1834a6053eceSJunchao Zhang jcn = mumps->jcn; 1835a6053eceSJunchao Zhang val = mumps->val; 1836a6053eceSJunchao Zhang count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1837a6053eceSJunchao Zhang remain = mumps->nnz - count; 1838a6053eceSJunchao Zhang while (count > 0) { 18399566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 18409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 18419566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1842a6053eceSJunchao Zhang irn += count; 1843a6053eceSJunchao Zhang jcn += count; 1844a6053eceSJunchao Zhang val += count; 1845a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1846a6053eceSJunchao Zhang remain -= count; 18473ab56b82SJunchao Zhang } 18483ab56b82SJunchao Zhang } 1849a6053eceSJunchao Zhang } else { 1850a6053eceSJunchao Zhang nreqs = 0; 1851a6053eceSJunchao Zhang if (mumps->is_omp_master) { 1852a6053eceSJunchao Zhang val = mumps->val + mumps->recvcount[0]; 1853a6053eceSJunchao Zhang for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1854a6053eceSJunchao Zhang count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1855a6053eceSJunchao Zhang remain = mumps->recvcount[i] - count; 1856a6053eceSJunchao Zhang while (count > 0) { 18579566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1858a6053eceSJunchao Zhang val += count; 1859a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1860a6053eceSJunchao Zhang remain -= count; 1861a6053eceSJunchao Zhang } 1862a6053eceSJunchao Zhang } 1863a6053eceSJunchao Zhang } else { 1864a6053eceSJunchao Zhang val = mumps->val; 1865a6053eceSJunchao Zhang count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1866a6053eceSJunchao Zhang remain = mumps->nnz - count; 1867a6053eceSJunchao Zhang while (count > 0) { 18689566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1869a6053eceSJunchao Zhang val += count; 1870a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1871a6053eceSJunchao Zhang remain -= count; 1872a6053eceSJunchao Zhang } 1873a6053eceSJunchao Zhang } 1874a6053eceSJunchao Zhang } 18759566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1876a6053eceSJunchao Zhang mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1877a6053eceSJunchao Zhang } 18783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18793ab56b82SJunchao Zhang } 18803ab56b82SJunchao Zhang 1881d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1882d71ae5a4SJacob Faibussowitsch { 1883e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1884ace3abfcSBarry Smith PetscBool isMPIAIJ; 1885397b6df1SKris Buschelman 1886397b6df1SKris Buschelman PetscFunctionBegin; 1887dbf6bb8dSprj- if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 188848a46eb9SPierre Jolivet if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 18899566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 18903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18912aca8efcSHong Zhang } 18926baea169SHong Zhang 18939566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 18949566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1895397b6df1SKris Buschelman 1896397b6df1SKris Buschelman /* numerical factorization phase */ 1897a5e57a09SHong Zhang mumps->id.job = JOB_FACTNUMERIC; 18984e34a73bSHong Zhang if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1899ad540459SPierre Jolivet if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1900397b6df1SKris Buschelman } else { 1901940cd9d6SSatish Balay mumps->id.a_loc = (MumpsScalar *)mumps->val; 1902397b6df1SKris Buschelman } 19033ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 1904a5e57a09SHong Zhang if (mumps->id.INFOG(1) < 0) { 19057a46b595SBarry Smith PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 1906c0d63f2fSHong Zhang if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 19079566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1908603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1909c0d63f2fSHong Zhang } else if (mumps->id.INFOG(1) == -13) { 19109566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1911603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1912c0d63f2fSHong Zhang } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 19139566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1914603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 19152aca8efcSHong Zhang } else { 19169566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1917603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OTHER; 1918151787a6SHong Zhang } 19192aca8efcSHong Zhang } 1920aed4548fSBarry Smith PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, " mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16)); 1921397b6df1SKris Buschelman 1922b3cb21ddSStefano Zampini F->assembled = PETSC_TRUE; 1923d47f36abSHong Zhang 1924b3cb21ddSStefano Zampini if (F->schur) { /* reset Schur status to unfactored */ 19253cb7dd0eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 1926c70f7ee4SJunchao Zhang F->schur->offloadmask = PETSC_OFFLOAD_CPU; 19273cb7dd0eSStefano Zampini #endif 1928b3cb21ddSStefano Zampini if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1929b3cb21ddSStefano Zampini mumps->id.ICNTL(19) = 2; 19309566063dSJacob Faibussowitsch PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1931b3cb21ddSStefano Zampini } 19329566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1933b3cb21ddSStefano Zampini } 193467877ebaSShri Abhyankar 1935066565c5SStefano Zampini /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1936066565c5SStefano Zampini if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1937066565c5SStefano Zampini 19383ab56b82SJunchao Zhang if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 19392d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 194067877ebaSShri Abhyankar PetscInt lsol_loc; 194167877ebaSShri Abhyankar PetscScalar *sol_loc; 19422205254eSKarl Rupp 19439566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1944c2093ab7SHong Zhang 1945c2093ab7SHong Zhang /* distributed solution; Create x_seq=sol_loc for repeated use */ 1946c2093ab7SHong Zhang if (mumps->x_seq) { 19479566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 19489566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 19499566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->x_seq)); 1950c2093ab7SHong Zhang } 1951a5e57a09SHong Zhang lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 19529566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1953a5e57a09SHong Zhang mumps->id.lsol_loc = lsol_loc; 1954940cd9d6SSatish Balay mumps->id.sol_loc = (MumpsScalar *)sol_loc; 19559566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 195667877ebaSShri Abhyankar } 19579566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 19583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1959397b6df1SKris Buschelman } 1960397b6df1SKris Buschelman 19619a2535b5SHong Zhang /* Sets MUMPS options from the options database */ 1962d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1963d71ae5a4SJacob Faibussowitsch { 1964e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1965413bcc21SPierre Jolivet PetscMUMPSInt icntl = 0, size, *listvar_schur; 196645e3843bSPierre Jolivet PetscInt info[80], i, ninfo = 80, rbs, cbs; 1967413bcc21SPierre Jolivet PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1968413bcc21SPierre Jolivet MumpsScalar *arr; 1969dcd589f8SShri Abhyankar 1970dcd589f8SShri Abhyankar PetscFunctionBegin; 197126cc229bSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 1972413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1973413bcc21SPierre Jolivet PetscInt nthreads = 0; 1974413bcc21SPierre Jolivet PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1975413bcc21SPierre Jolivet PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1976413bcc21SPierre Jolivet 1977413bcc21SPierre Jolivet mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1978413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 1979413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 1980413bcc21SPierre Jolivet 1981413bcc21SPierre Jolivet PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 1982413bcc21SPierre Jolivet if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1983413bcc21SPierre Jolivet /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1984413bcc21SPierre Jolivet PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 1985413bcc21SPierre Jolivet if (mumps->use_petsc_omp_support) { 19869371c9d4SSatish Balay PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 19879371c9d4SSatish Balay ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1988413bcc21SPierre Jolivet PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1989413bcc21SPierre Jolivet #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1990413bcc21SPierre Jolivet PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 1991413bcc21SPierre Jolivet PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 1992413bcc21SPierre Jolivet #endif 1993413bcc21SPierre Jolivet } else { 1994413bcc21SPierre Jolivet mumps->omp_comm = PETSC_COMM_SELF; 1995413bcc21SPierre Jolivet mumps->mumps_comm = mumps->petsc_comm; 1996413bcc21SPierre Jolivet mumps->is_omp_master = PETSC_TRUE; 1997413bcc21SPierre Jolivet } 1998413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 1999413bcc21SPierre Jolivet mumps->reqs = NULL; 2000413bcc21SPierre Jolivet mumps->tag = 0; 2001413bcc21SPierre Jolivet 2002413bcc21SPierre Jolivet if (mumps->mumps_comm != MPI_COMM_NULL) { 2003413bcc21SPierre Jolivet if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2004413bcc21SPierre Jolivet /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2005413bcc21SPierre Jolivet MPI_Comm comm; 2006413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2007413bcc21SPierre Jolivet mumps->mumps_comm = comm; 2008413bcc21SPierre Jolivet } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2009413bcc21SPierre Jolivet } 2010413bcc21SPierre Jolivet 2011413bcc21SPierre Jolivet mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2012413bcc21SPierre Jolivet mumps->id.job = JOB_INIT; 2013413bcc21SPierre Jolivet mumps->id.par = 1; /* host participates factorizaton and solve */ 2014413bcc21SPierre Jolivet mumps->id.sym = mumps->sym; 2015413bcc21SPierre Jolivet 2016413bcc21SPierre Jolivet size = mumps->id.size_schur; 2017413bcc21SPierre Jolivet arr = mumps->id.schur; 2018413bcc21SPierre Jolivet listvar_schur = mumps->id.listvar_schur; 2019413bcc21SPierre Jolivet PetscMUMPS_c(mumps); 2020413bcc21SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 2021413bcc21SPierre Jolivet /* restore cached ICNTL and CNTL values */ 2022413bcc21SPierre Jolivet for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2023413bcc21SPierre Jolivet for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2024413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->ICNTL_pre)); 2025413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->CNTL_pre)); 2026413bcc21SPierre Jolivet 2027413bcc21SPierre Jolivet if (schur) { 2028413bcc21SPierre Jolivet mumps->id.size_schur = size; 2029413bcc21SPierre Jolivet mumps->id.schur_lld = size; 2030413bcc21SPierre Jolivet mumps->id.schur = arr; 2031413bcc21SPierre Jolivet mumps->id.listvar_schur = listvar_schur; 2032413bcc21SPierre Jolivet if (mumps->petsc_size > 1) { 2033413bcc21SPierre Jolivet PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2034413bcc21SPierre Jolivet 2035413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2036413bcc21SPierre Jolivet gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2037712fec58SPierre Jolivet PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2038413bcc21SPierre Jolivet PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2039413bcc21SPierre Jolivet } else { 2040413bcc21SPierre Jolivet if (F->factortype == MAT_FACTOR_LU) { 2041413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2042413bcc21SPierre Jolivet } else { 2043413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2044413bcc21SPierre Jolivet } 2045413bcc21SPierre Jolivet } 2046413bcc21SPierre Jolivet mumps->id.ICNTL(26) = -1; 2047413bcc21SPierre Jolivet } 2048413bcc21SPierre Jolivet 2049413bcc21SPierre Jolivet /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2050413bcc21SPierre Jolivet For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2051413bcc21SPierre Jolivet */ 2052413bcc21SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2053413bcc21SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2054413bcc21SPierre Jolivet 2055413bcc21SPierre Jolivet mumps->scat_rhs = NULL; 2056413bcc21SPierre Jolivet mumps->scat_sol = NULL; 2057413bcc21SPierre Jolivet 2058413bcc21SPierre Jolivet /* set PETSc-MUMPS default options - override MUMPS default */ 2059413bcc21SPierre Jolivet mumps->id.ICNTL(3) = 0; 2060413bcc21SPierre Jolivet mumps->id.ICNTL(4) = 0; 2061413bcc21SPierre Jolivet if (mumps->petsc_size == 1) { 2062413bcc21SPierre Jolivet mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2063413bcc21SPierre Jolivet mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2064413bcc21SPierre Jolivet } else { 2065413bcc21SPierre Jolivet mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2066413bcc21SPierre Jolivet mumps->id.ICNTL(21) = 1; /* distributed solution */ 2067413bcc21SPierre Jolivet } 2068413bcc21SPierre Jolivet } 20699566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 20709a2535b5SHong Zhang if (flg) mumps->id.ICNTL(1) = icntl; 20719566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 20729a2535b5SHong Zhang if (flg) mumps->id.ICNTL(2) = icntl; 20739566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 20749a2535b5SHong Zhang if (flg) mumps->id.ICNTL(3) = icntl; 2075dcd589f8SShri Abhyankar 20769566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 20779a2535b5SHong Zhang if (flg) mumps->id.ICNTL(4) = icntl; 20789a2535b5SHong Zhang if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 20799a2535b5SHong Zhang 20809566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 20819a2535b5SHong Zhang if (flg) mumps->id.ICNTL(6) = icntl; 20829a2535b5SHong Zhang 20839566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2084dcd589f8SShri Abhyankar if (flg) { 2085aed4548fSBarry Smith PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2086b53c1a7fSBarry Smith mumps->id.ICNTL(7) = icntl; 2087dcd589f8SShri Abhyankar } 2088e0b74bf9SHong Zhang 20899566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 20909566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 20919566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 20929566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 20939566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 20949566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 20959566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 209645e3843bSPierre Jolivet PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 209745e3843bSPierre Jolivet if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 209845e3843bSPierre Jolivet PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 209945e3843bSPierre Jolivet if (flg) { 210045e3843bSPierre Jolivet PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 210145e3843bSPierre Jolivet PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 210245e3843bSPierre Jolivet } 21039566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 210459ac8732SStefano Zampini if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 21059566063dSJacob Faibussowitsch PetscCall(MatDestroy(&F->schur)); 21069566063dSJacob Faibussowitsch PetscCall(MatMumpsResetSchur_Private(mumps)); 210759ac8732SStefano Zampini } 210825aac85cSJunchao Zhang 210943f3b051SJunchao Zhang /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 211043f3b051SJunchao Zhang and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 211125aac85cSJunchao Zhang and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 211243f3b051SJunchao Zhang This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 211343f3b051SJunchao Zhang see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 211443f3b051SJunchao Zhang In short, we could not use distributed RHS with MPICH until v4.0b1. 211525aac85cSJunchao Zhang */ 211643f3b051SJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 211725aac85cSJunchao Zhang mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 211843f3b051SJunchao Zhang #else 211943f3b051SJunchao Zhang mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 212025aac85cSJunchao Zhang #endif 21219566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2122aed4548fSBarry Smith PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 212325aac85cSJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2124aed4548fSBarry Smith PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 212525aac85cSJunchao Zhang #endif 21269566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 21279a2535b5SHong Zhang 21289566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 21299566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 21309566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 21319371c9d4SSatish Balay if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2132d7ebd59bSHong Zhang 21339566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 21349566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 21359566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 21369566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 21379566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 21389566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 21399566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 21409566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 21419566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 21429566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 21439566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 21449566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2145*146931dbSPierre Jolivet PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2146dcd589f8SShri Abhyankar 21479566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 21489566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 21499566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 21509566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 21519566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 21529566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2153e5bb22a1SHong Zhang 21549566063dSJacob Faibussowitsch PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2155b34f08ffSHong Zhang 21569566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2157b34f08ffSHong Zhang if (ninfo) { 215808401ef6SPierre Jolivet PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 21599566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2160b34f08ffSHong Zhang mumps->ninfo = ninfo; 2161b34f08ffSHong Zhang for (i = 0; i < ninfo; i++) { 2162aed4548fSBarry Smith PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2163f7d195e4SLawrence Mitchell mumps->info[i] = info[i]; 2164b34f08ffSHong Zhang } 2165b34f08ffSHong Zhang } 2166d0609cedSBarry Smith PetscOptionsEnd(); 21673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2168dcd589f8SShri Abhyankar } 2169dcd589f8SShri Abhyankar 2170d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2171d71ae5a4SJacob Faibussowitsch { 21725cd7cf9dSHong Zhang PetscFunctionBegin; 21735cd7cf9dSHong Zhang if (mumps->id.INFOG(1) < 0) { 21747a46b595SBarry Smith PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1)); 21755cd7cf9dSHong Zhang if (mumps->id.INFOG(1) == -6) { 21769566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2177603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 21785cd7cf9dSHong Zhang } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 21799566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2180603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2181dbf6bb8dSprj- } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 21829566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "Empty matrix\n")); 21835cd7cf9dSHong Zhang } else { 21849566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2185603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OTHER; 21865cd7cf9dSHong Zhang } 21875cd7cf9dSHong Zhang } 21883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 21895cd7cf9dSHong Zhang } 21905cd7cf9dSHong Zhang 2191d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2192d71ae5a4SJacob Faibussowitsch { 2193e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 219467877ebaSShri Abhyankar Vec b; 219567877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 2196397b6df1SKris Buschelman 2197397b6df1SKris Buschelman PetscFunctionBegin; 2198d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2199d47f36abSHong Zhang /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 22003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2201d47f36abSHong Zhang } 2202dcd589f8SShri Abhyankar 22039a2535b5SHong Zhang /* Set MUMPS options from the options database */ 220426cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 2205dcd589f8SShri Abhyankar 22069566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 22079566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2208dcd589f8SShri Abhyankar 220967877ebaSShri Abhyankar /* analysis phase */ 2210a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 2211a5e57a09SHong Zhang mumps->id.n = M; 2212a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 221367877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 2214a5e57a09SHong Zhang if (!mumps->myid) { 2215a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 2216a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2217a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2218a6053eceSJunchao Zhang if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 22194ac6704cSBarry Smith if (r) { 22204ac6704cSBarry Smith mumps->id.ICNTL(7) = 1; 2221a5e57a09SHong Zhang if (!mumps->myid) { 2222e0b74bf9SHong Zhang const PetscInt *idx; 2223a6053eceSJunchao Zhang PetscInt i; 22242205254eSKarl Rupp 22259566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 22269566063dSJacob Faibussowitsch PetscCall(ISGetIndices(r, &idx)); 22279566063dSJacob Faibussowitsch for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 22289566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(r, &idx)); 2229e0b74bf9SHong Zhang } 2230e0b74bf9SHong Zhang } 223167877ebaSShri Abhyankar } 223267877ebaSShri Abhyankar break; 223367877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2234a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2235a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2236a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2237a6053eceSJunchao Zhang if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 223825aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 22399566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 22409566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 22419566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 224225aac85cSJunchao Zhang } 224367877ebaSShri Abhyankar break; 224467877ebaSShri Abhyankar } 22453ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 22469566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 224767877ebaSShri Abhyankar 2248719d5645SBarry Smith F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2249dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 225051d5961aSHong Zhang F->ops->solvetranspose = MatSolveTranspose_MUMPS; 22514e34a73bSHong Zhang F->ops->matsolve = MatMatSolve_MUMPS; 2252eb3ef3b2SHong Zhang F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2253b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2254d47f36abSHong Zhang 2255d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 22563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2257b24902e0SBarry Smith } 2258b24902e0SBarry Smith 2259450b117fSShri Abhyankar /* Note the Petsc r and c permutations are ignored */ 2260d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2261d71ae5a4SJacob Faibussowitsch { 2262e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 226367877ebaSShri Abhyankar Vec b; 226467877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 2265450b117fSShri Abhyankar 2266450b117fSShri Abhyankar PetscFunctionBegin; 2267d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2268338d3105SPierre Jolivet /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 22693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2270d47f36abSHong Zhang } 2271dcd589f8SShri Abhyankar 22729a2535b5SHong Zhang /* Set MUMPS options from the options database */ 227326cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 2274dcd589f8SShri Abhyankar 22759566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 22769566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 227767877ebaSShri Abhyankar 227867877ebaSShri Abhyankar /* analysis phase */ 2279a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 2280a5e57a09SHong Zhang mumps->id.n = M; 2281a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 228267877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 2283a5e57a09SHong Zhang if (!mumps->myid) { 2284a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 2285a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2286a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2287ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 228867877ebaSShri Abhyankar } 228967877ebaSShri Abhyankar break; 229067877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2291a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2292a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2293a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2294ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 229525aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 22969566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 22979566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 22989566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 229925aac85cSJunchao Zhang } 230067877ebaSShri Abhyankar break; 230167877ebaSShri Abhyankar } 23023ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 23039566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 230467877ebaSShri Abhyankar 2305450b117fSShri Abhyankar F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2306dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 230751d5961aSHong Zhang F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2308b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2309d47f36abSHong Zhang 2310d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 23113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2312450b117fSShri Abhyankar } 2313b24902e0SBarry Smith 2314141f4205SHong Zhang /* Note the Petsc r permutation and factor info are ignored */ 2315d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2316d71ae5a4SJacob Faibussowitsch { 2317e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 231867877ebaSShri Abhyankar Vec b; 231967877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 2320397b6df1SKris Buschelman 2321397b6df1SKris Buschelman PetscFunctionBegin; 2322d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2323338d3105SPierre Jolivet /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 23243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2325d47f36abSHong Zhang } 2326dcd589f8SShri Abhyankar 23279a2535b5SHong Zhang /* Set MUMPS options from the options database */ 232826cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 2329dcd589f8SShri Abhyankar 23309566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 23319566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2332dcd589f8SShri Abhyankar 233367877ebaSShri Abhyankar /* analysis phase */ 2334a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 2335a5e57a09SHong Zhang mumps->id.n = M; 2336a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 233767877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 2338a5e57a09SHong Zhang if (!mumps->myid) { 2339a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 2340a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2341a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2342ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 234367877ebaSShri Abhyankar } 234467877ebaSShri Abhyankar break; 234567877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2346a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2347a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2348a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2349ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 235025aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 23519566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 23529566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 23539566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 235425aac85cSJunchao Zhang } 235567877ebaSShri Abhyankar break; 235667877ebaSShri Abhyankar } 23573ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 23589566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 23595cd7cf9dSHong Zhang 23602792810eSHong Zhang F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2361dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 236251d5961aSHong Zhang F->ops->solvetranspose = MatSolve_MUMPS; 23634e34a73bSHong Zhang F->ops->matsolve = MatMatSolve_MUMPS; 236423a5080aSHong Zhang F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2365b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 23664e34a73bSHong Zhang #if defined(PETSC_USE_COMPLEX) 23670298fd71SBarry Smith F->ops->getinertia = NULL; 23684e34a73bSHong Zhang #else 23694e34a73bSHong Zhang F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2370db4efbfdSBarry Smith #endif 2371d47f36abSHong Zhang 2372d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 23733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2374b24902e0SBarry Smith } 2375b24902e0SBarry Smith 2376d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2377d71ae5a4SJacob Faibussowitsch { 237864e6c443SBarry Smith PetscBool iascii; 237964e6c443SBarry Smith PetscViewerFormat format; 2380e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2381f6c57405SHong Zhang 2382f6c57405SHong Zhang PetscFunctionBegin; 238364e6c443SBarry Smith /* check if matrix is mumps type */ 23843ba16761SJacob Faibussowitsch if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 238564e6c443SBarry Smith 23869566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 238764e6c443SBarry Smith if (iascii) { 23889566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 23891511cd71SPierre Jolivet if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 23909566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 23911511cd71SPierre Jolivet if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 23929566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 23939566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 23949566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 23959566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 23969566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 23979566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 23989566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 23999566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 24009566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 24019566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 24029566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 24039566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2404a5e57a09SHong Zhang if (mumps->id.ICNTL(11) > 0) { 24059566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 24069566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 24079566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 24089566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 24099566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 24109566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2411f6c57405SHong Zhang } 24129566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 24139566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 24149566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 241545e3843bSPierre Jolivet PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2416f6c57405SHong Zhang /* ICNTL(15-17) not used */ 24179566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 24189566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 24199566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 24209566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 24219566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 24229566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2423c0165424SHong Zhang 24249566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 24259566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 24269566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 24279566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 24289566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 24299566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 243042179a6aSHong Zhang 24319566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 24329566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 24339566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 24349566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 24359566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 24369566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2437*146931dbSPierre Jolivet PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2438f6c57405SHong Zhang 24399566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 24409566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 24419566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 24429566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 24439566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 24449566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2445f6c57405SHong Zhang 2446a5b23f4aSJose E. Roman /* information local to each processor */ 24479566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 24489566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 24499566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 24509566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 24519566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 24529566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 24539566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 24549566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 24559566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 24569566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2457f6c57405SHong Zhang 24589566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 24599566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 24609566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2461f6c57405SHong Zhang 24629566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 24639566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 24649566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2465f6c57405SHong Zhang 24669566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 24679566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 24689566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2469b34f08ffSHong Zhang 2470a0e18203SThibaut Appel if (mumps->ninfo && mumps->ninfo <= 80) { 2471b34f08ffSHong Zhang PetscInt i; 2472b34f08ffSHong Zhang for (i = 0; i < mumps->ninfo; i++) { 24739566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 24749566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 24759566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2476b34f08ffSHong Zhang } 2477b34f08ffSHong Zhang } 24789566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 24791511cd71SPierre Jolivet } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2480f6c57405SHong Zhang 24811511cd71SPierre Jolivet if (mumps->myid == 0) { /* information from the host */ 24829566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 24839566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 24849566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 24859566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2486f6c57405SHong Zhang 24879566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 24889566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 24899566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 24909566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 24919566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 24929566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 24939566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 24949566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 24959566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 24969566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 24979566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 24989566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 24999566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 25009566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 25019566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 25029566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 25039566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 25049566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 25059566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 25069566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 25079566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 25089566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 25099566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 25109566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 25119566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 25129566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 25139566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 25149566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 25159566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 25169566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 25179566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 25189566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 25199566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 25209566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2521f6c57405SHong Zhang } 2522f6c57405SHong Zhang } 2523cb828f0fSHong Zhang } 25243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2525f6c57405SHong Zhang } 2526f6c57405SHong Zhang 2527d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2528d71ae5a4SJacob Faibussowitsch { 2529e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 253035bd34faSBarry Smith 253135bd34faSBarry Smith PetscFunctionBegin; 253235bd34faSBarry Smith info->block_size = 1.0; 253364412097SPierre Jolivet info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 253464412097SPierre Jolivet info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 253535bd34faSBarry Smith info->nz_unneeded = 0.0; 253635bd34faSBarry Smith info->assemblies = 0.0; 253735bd34faSBarry Smith info->mallocs = 0.0; 253835bd34faSBarry Smith info->memory = 0.0; 253935bd34faSBarry Smith info->fill_ratio_given = 0; 254035bd34faSBarry Smith info->fill_ratio_needed = 0; 254135bd34faSBarry Smith info->factor_mallocs = 0; 25423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 254335bd34faSBarry Smith } 254435bd34faSBarry Smith 2545d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2546d71ae5a4SJacob Faibussowitsch { 2547e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2548a3d589ffSStefano Zampini const PetscScalar *arr; 25498e7ba810SStefano Zampini const PetscInt *idxs; 25508e7ba810SStefano Zampini PetscInt size, i; 25516444a565SStefano Zampini 25526444a565SStefano Zampini PetscFunctionBegin; 25539566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is, &size)); 2554b3cb21ddSStefano Zampini /* Schur complement matrix */ 25559566063dSJacob Faibussowitsch PetscCall(MatDestroy(&F->schur)); 25569566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 25579566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2558a3d589ffSStefano Zampini mumps->id.schur = (MumpsScalar *)arr; 2559a3d589ffSStefano Zampini mumps->id.size_schur = size; 2560a3d589ffSStefano Zampini mumps->id.schur_lld = size; 25619566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 256248a46eb9SPierre Jolivet if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2563b3cb21ddSStefano Zampini 2564b3cb21ddSStefano Zampini /* MUMPS expects Fortran style indices */ 25659566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.listvar_schur)); 25669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 25679566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is, &idxs)); 25689566063dSJacob Faibussowitsch for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 25699566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is, &idxs)); 257059ac8732SStefano Zampini /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2571b5fa320bSStefano Zampini mumps->id.ICNTL(26) = -1; 25723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25736444a565SStefano Zampini } 257459ac8732SStefano Zampini 2575d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2576d71ae5a4SJacob Faibussowitsch { 25776444a565SStefano Zampini Mat St; 2578e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 25796444a565SStefano Zampini PetscScalar *array; 25806444a565SStefano Zampini #if defined(PETSC_USE_COMPLEX) 25818ac429a0SStefano Zampini PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 25826444a565SStefano Zampini #endif 25836444a565SStefano Zampini 25846444a565SStefano Zampini PetscFunctionBegin; 258508401ef6SPierre Jolivet PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 25869566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 25879566063dSJacob Faibussowitsch PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 25889566063dSJacob Faibussowitsch PetscCall(MatSetType(St, MATDENSE)); 25899566063dSJacob Faibussowitsch PetscCall(MatSetUp(St)); 25909566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(St, &array)); 259159ac8732SStefano Zampini if (!mumps->sym) { /* MUMPS always return a full matrix */ 25926444a565SStefano Zampini if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 25936444a565SStefano Zampini PetscInt i, j, N = mumps->id.size_schur; 25946444a565SStefano Zampini for (i = 0; i < N; i++) { 25956444a565SStefano Zampini for (j = 0; j < N; j++) { 25966444a565SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 25976444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j]; 25986444a565SStefano Zampini #else 25996444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 26006444a565SStefano Zampini #endif 26016444a565SStefano Zampini array[j * N + i] = val; 26026444a565SStefano Zampini } 26036444a565SStefano Zampini } 26046444a565SStefano Zampini } else { /* stored by columns */ 26059566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 26066444a565SStefano Zampini } 26076444a565SStefano Zampini } else { /* either full or lower-triangular (not packed) */ 26086444a565SStefano Zampini if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 26096444a565SStefano Zampini PetscInt i, j, N = mumps->id.size_schur; 26106444a565SStefano Zampini for (i = 0; i < N; i++) { 26116444a565SStefano Zampini for (j = i; j < N; j++) { 26126444a565SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 26136444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j]; 26146444a565SStefano Zampini #else 26156444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 26166444a565SStefano Zampini #endif 26176444a565SStefano Zampini array[i * N + j] = val; 26186444a565SStefano Zampini array[j * N + i] = val; 26196444a565SStefano Zampini } 26206444a565SStefano Zampini } 26216444a565SStefano Zampini } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 26229566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 26236444a565SStefano Zampini } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 26246444a565SStefano Zampini PetscInt i, j, N = mumps->id.size_schur; 26256444a565SStefano Zampini for (i = 0; i < N; i++) { 26266444a565SStefano Zampini for (j = 0; j < i + 1; j++) { 26276444a565SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 26286444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j]; 26296444a565SStefano Zampini #else 26306444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 26316444a565SStefano Zampini #endif 26326444a565SStefano Zampini array[i * N + j] = val; 26336444a565SStefano Zampini array[j * N + i] = val; 26346444a565SStefano Zampini } 26356444a565SStefano Zampini } 26366444a565SStefano Zampini } 26376444a565SStefano Zampini } 26389566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(St, &array)); 26396444a565SStefano Zampini *S = St; 26403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 26416444a565SStefano Zampini } 26426444a565SStefano Zampini 2643d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2644d71ae5a4SJacob Faibussowitsch { 2645e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 26465ccb76cbSHong Zhang 26475ccb76cbSHong Zhang PetscFunctionBegin; 2648413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2649413bcc21SPierre Jolivet PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 26509371c9d4SSatish Balay for (i = 0; i < nICNTL_pre; ++i) 26519371c9d4SSatish Balay if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2652413bcc21SPierre Jolivet if (i == nICNTL_pre) { /* not already cached */ 2653413bcc21SPierre Jolivet if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2654413bcc21SPierre Jolivet else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2655413bcc21SPierre Jolivet mumps->ICNTL_pre[0]++; 2656413bcc21SPierre Jolivet } 2657413bcc21SPierre Jolivet mumps->ICNTL_pre[1 + 2 * i] = icntl; 2658413bcc21SPierre Jolivet PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2659413bcc21SPierre Jolivet } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 26603ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 26615ccb76cbSHong Zhang } 26625ccb76cbSHong Zhang 2663d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2664d71ae5a4SJacob Faibussowitsch { 2665e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2666bc6112feSHong Zhang 2667bc6112feSHong Zhang PetscFunctionBegin; 266836df9881Sjeremy theler if (mumps->id.job == JOB_NULL) { 266936df9881Sjeremy theler PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 267036df9881Sjeremy theler *ival = 0; 267136df9881Sjeremy theler for (i = 0; i < nICNTL_pre; ++i) { 267236df9881Sjeremy theler if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 267336df9881Sjeremy theler } 267436df9881Sjeremy theler } else *ival = mumps->id.ICNTL(icntl); 26753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2676bc6112feSHong Zhang } 2677bc6112feSHong Zhang 26785ccb76cbSHong Zhang /*@ 26795ccb76cbSHong Zhang MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 26805ccb76cbSHong Zhang 2681c3339decSBarry Smith Logically Collective 26825ccb76cbSHong Zhang 26835ccb76cbSHong Zhang Input Parameters: 268411a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 26855ccb76cbSHong Zhang . icntl - index of MUMPS parameter array ICNTL() 26865ccb76cbSHong Zhang - ival - value of MUMPS ICNTL(icntl) 26875ccb76cbSHong Zhang 26883c7db156SBarry Smith Options Database Key: 2689147403d9SBarry Smith . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 26905ccb76cbSHong Zhang 26915ccb76cbSHong Zhang Level: beginner 26925ccb76cbSHong Zhang 269396a0c994SBarry Smith References: 2694606c0280SSatish Balay . * - MUMPS Users' Guide 26955ccb76cbSHong Zhang 26961cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 26975ccb76cbSHong Zhang @*/ 2698d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2699d71ae5a4SJacob Faibussowitsch { 27005ccb76cbSHong Zhang PetscFunctionBegin; 27012989dfd4SHong Zhang PetscValidType(F, 1); 270228b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 27035ccb76cbSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 27045ccb76cbSHong Zhang PetscValidLogicalCollectiveInt(F, ival, 3); 2705*146931dbSPierre Jolivet PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2706cac4c232SBarry Smith PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 27073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 27085ccb76cbSHong Zhang } 27095ccb76cbSHong Zhang 2710a21f80fcSHong Zhang /*@ 2711a21f80fcSHong Zhang MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2712a21f80fcSHong Zhang 2713c3339decSBarry Smith Logically Collective 2714a21f80fcSHong Zhang 2715a21f80fcSHong Zhang Input Parameters: 271611a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2717a21f80fcSHong Zhang - icntl - index of MUMPS parameter array ICNTL() 2718a21f80fcSHong Zhang 2719a21f80fcSHong Zhang Output Parameter: 2720a21f80fcSHong Zhang . ival - value of MUMPS ICNTL(icntl) 2721a21f80fcSHong Zhang 2722a21f80fcSHong Zhang Level: beginner 2723a21f80fcSHong Zhang 272496a0c994SBarry Smith References: 2725606c0280SSatish Balay . * - MUMPS Users' Guide 2726a21f80fcSHong Zhang 27271cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2728a21f80fcSHong Zhang @*/ 2729d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2730d71ae5a4SJacob Faibussowitsch { 2731bc6112feSHong Zhang PetscFunctionBegin; 27322989dfd4SHong Zhang PetscValidType(F, 1); 273328b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2734bc6112feSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 2735bc6112feSHong Zhang PetscValidIntPointer(ival, 3); 2736*146931dbSPierre Jolivet PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2737cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 27383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2739bc6112feSHong Zhang } 2740bc6112feSHong Zhang 2741d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2742d71ae5a4SJacob Faibussowitsch { 2743e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 27448928b65cSHong Zhang 27458928b65cSHong Zhang PetscFunctionBegin; 2746413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { 2747413bcc21SPierre Jolivet PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 27489371c9d4SSatish Balay for (i = 0; i < nCNTL_pre; ++i) 27499371c9d4SSatish Balay if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2750413bcc21SPierre Jolivet if (i == nCNTL_pre) { 2751413bcc21SPierre Jolivet if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2752413bcc21SPierre Jolivet else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2753413bcc21SPierre Jolivet mumps->CNTL_pre[0]++; 2754413bcc21SPierre Jolivet } 2755413bcc21SPierre Jolivet mumps->CNTL_pre[1 + 2 * i] = icntl; 2756413bcc21SPierre Jolivet mumps->CNTL_pre[2 + 2 * i] = val; 2757413bcc21SPierre Jolivet } else mumps->id.CNTL(icntl) = val; 27583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 27598928b65cSHong Zhang } 27608928b65cSHong Zhang 2761d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2762d71ae5a4SJacob Faibussowitsch { 2763e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2764bc6112feSHong Zhang 2765bc6112feSHong Zhang PetscFunctionBegin; 276636df9881Sjeremy theler if (mumps->id.job == JOB_NULL) { 276736df9881Sjeremy theler PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 276836df9881Sjeremy theler *val = 0.0; 276936df9881Sjeremy theler for (i = 0; i < nCNTL_pre; ++i) { 277036df9881Sjeremy theler if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 277136df9881Sjeremy theler } 277236df9881Sjeremy theler } else *val = mumps->id.CNTL(icntl); 27733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2774bc6112feSHong Zhang } 2775bc6112feSHong Zhang 27768928b65cSHong Zhang /*@ 27778928b65cSHong Zhang MatMumpsSetCntl - Set MUMPS parameter CNTL() 27788928b65cSHong Zhang 2779c3339decSBarry Smith Logically Collective 27808928b65cSHong Zhang 27818928b65cSHong Zhang Input Parameters: 278211a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 27838928b65cSHong Zhang . icntl - index of MUMPS parameter array CNTL() 27848928b65cSHong Zhang - val - value of MUMPS CNTL(icntl) 27858928b65cSHong Zhang 27863c7db156SBarry Smith Options Database Key: 2787147403d9SBarry Smith . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 27888928b65cSHong Zhang 27898928b65cSHong Zhang Level: beginner 27908928b65cSHong Zhang 279196a0c994SBarry Smith References: 2792606c0280SSatish Balay . * - MUMPS Users' Guide 27938928b65cSHong Zhang 27941cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 27958928b65cSHong Zhang @*/ 2796d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2797d71ae5a4SJacob Faibussowitsch { 27988928b65cSHong Zhang PetscFunctionBegin; 27992989dfd4SHong Zhang PetscValidType(F, 1); 280028b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 28018928b65cSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 2802bc6112feSHong Zhang PetscValidLogicalCollectiveReal(F, val, 3); 2803413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2804cac4c232SBarry Smith PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 28053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28068928b65cSHong Zhang } 28078928b65cSHong Zhang 2808a21f80fcSHong Zhang /*@ 2809a21f80fcSHong Zhang MatMumpsGetCntl - Get MUMPS parameter CNTL() 2810a21f80fcSHong Zhang 2811c3339decSBarry Smith Logically Collective 2812a21f80fcSHong Zhang 2813a21f80fcSHong Zhang Input Parameters: 281411a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2815a21f80fcSHong Zhang - icntl - index of MUMPS parameter array CNTL() 2816a21f80fcSHong Zhang 2817a21f80fcSHong Zhang Output Parameter: 2818a21f80fcSHong Zhang . val - value of MUMPS CNTL(icntl) 2819a21f80fcSHong Zhang 2820a21f80fcSHong Zhang Level: beginner 2821a21f80fcSHong Zhang 282296a0c994SBarry Smith References: 2823606c0280SSatish Balay . * - MUMPS Users' Guide 2824a21f80fcSHong Zhang 28251cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2826a21f80fcSHong Zhang @*/ 2827d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2828d71ae5a4SJacob Faibussowitsch { 2829bc6112feSHong Zhang PetscFunctionBegin; 28302989dfd4SHong Zhang PetscValidType(F, 1); 283128b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2832bc6112feSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 2833bc6112feSHong Zhang PetscValidRealPointer(val, 3); 2834413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2835cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 28363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2837bc6112feSHong Zhang } 2838bc6112feSHong Zhang 2839d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2840d71ae5a4SJacob Faibussowitsch { 2841e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2842bc6112feSHong Zhang 2843bc6112feSHong Zhang PetscFunctionBegin; 2844bc6112feSHong Zhang *info = mumps->id.INFO(icntl); 28453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2846bc6112feSHong Zhang } 2847bc6112feSHong Zhang 2848d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2849d71ae5a4SJacob Faibussowitsch { 2850e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2851bc6112feSHong Zhang 2852bc6112feSHong Zhang PetscFunctionBegin; 2853bc6112feSHong Zhang *infog = mumps->id.INFOG(icntl); 28543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2855bc6112feSHong Zhang } 2856bc6112feSHong Zhang 2857d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2858d71ae5a4SJacob Faibussowitsch { 2859e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2860bc6112feSHong Zhang 2861bc6112feSHong Zhang PetscFunctionBegin; 2862bc6112feSHong Zhang *rinfo = mumps->id.RINFO(icntl); 28633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2864bc6112feSHong Zhang } 2865bc6112feSHong Zhang 2866d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2867d71ae5a4SJacob Faibussowitsch { 2868e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2869bc6112feSHong Zhang 2870bc6112feSHong Zhang PetscFunctionBegin; 2871bc6112feSHong Zhang *rinfog = mumps->id.RINFOG(icntl); 28723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2873bc6112feSHong Zhang } 2874bc6112feSHong Zhang 28755c0bae8cSAshish Patel PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 28765c0bae8cSAshish Patel { 28775c0bae8cSAshish Patel Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 28785c0bae8cSAshish Patel 28795c0bae8cSAshish Patel PetscFunctionBegin; 28805c0bae8cSAshish Patel PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 28815c0bae8cSAshish Patel *size = 0; 28825c0bae8cSAshish Patel *array = NULL; 28835c0bae8cSAshish Patel if (!mumps->myid) { 28845c0bae8cSAshish Patel *size = mumps->id.INFOG(28); 28855c0bae8cSAshish Patel PetscCall(PetscMalloc1(*size, array)); 28865c0bae8cSAshish Patel for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 28875c0bae8cSAshish Patel } 28885c0bae8cSAshish Patel PetscFunctionReturn(PETSC_SUCCESS); 28895c0bae8cSAshish Patel } 28905c0bae8cSAshish Patel 2891d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2892d71ae5a4SJacob Faibussowitsch { 28930e6b8875SHong Zhang Mat Bt = NULL, Btseq = NULL; 28940e6b8875SHong Zhang PetscBool flg; 2895bb599dfdSHong Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2896bb599dfdSHong Zhang PetscScalar *aa; 2897f410b75aSHong Zhang PetscInt spnr, *ia, *ja, M, nrhs; 2898bb599dfdSHong Zhang 2899bb599dfdSHong Zhang PetscFunctionBegin; 2900064a246eSJacob Faibussowitsch PetscValidPointer(spRHS, 2); 2901013e2dc7SBarry Smith PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 29020e6b8875SHong Zhang if (flg) { 29039566063dSJacob Faibussowitsch PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2904013e2dc7SBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2905bb599dfdSHong Zhang 29069566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2907bb599dfdSHong Zhang 29082d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 29090e6b8875SHong Zhang Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 29100e6b8875SHong Zhang Btseq = b->A; 29110e6b8875SHong Zhang } else { 29120e6b8875SHong Zhang Btseq = Bt; 29130e6b8875SHong Zhang } 29140e6b8875SHong Zhang 29159566063dSJacob Faibussowitsch PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2916f410b75aSHong Zhang mumps->id.nrhs = nrhs; 2917f410b75aSHong Zhang mumps->id.lrhs = M; 2918f410b75aSHong Zhang mumps->id.rhs = NULL; 2919f410b75aSHong Zhang 2920e3f2db6aSHong Zhang if (!mumps->myid) { 29219566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 29229566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 292328b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 29249566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2925bb599dfdSHong Zhang mumps->id.rhs_sparse = (MumpsScalar *)aa; 2926e3f2db6aSHong Zhang } else { 2927e3f2db6aSHong Zhang mumps->id.irhs_ptr = NULL; 2928e3f2db6aSHong Zhang mumps->id.irhs_sparse = NULL; 2929e3f2db6aSHong Zhang mumps->id.nz_rhs = 0; 2930e3f2db6aSHong Zhang mumps->id.rhs_sparse = NULL; 2931e3f2db6aSHong Zhang } 2932bb599dfdSHong Zhang mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2933e3f2db6aSHong Zhang mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2934bb599dfdSHong Zhang 2935bb599dfdSHong Zhang /* solve phase */ 2936bb599dfdSHong Zhang mumps->id.job = JOB_SOLVE; 29373ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 2938049d1499SBarry Smith PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 293914267174SHong Zhang 2940e3f2db6aSHong Zhang if (!mumps->myid) { 29419566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 29429566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 294328b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2944e3f2db6aSHong Zhang } 29453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2946bb599dfdSHong Zhang } 2947bb599dfdSHong Zhang 2948bb599dfdSHong Zhang /*@ 29492ef1f0ffSBarry Smith MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 2950bb599dfdSHong Zhang 2951c3339decSBarry Smith Logically Collective 2952bb599dfdSHong Zhang 295320f4b53cSBarry Smith Input Parameter: 295420f4b53cSBarry Smith . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2955bb599dfdSHong Zhang 2956bb599dfdSHong Zhang Output Parameter: 295720f4b53cSBarry Smith . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 2958bb599dfdSHong Zhang 2959bb599dfdSHong Zhang Level: beginner 2960bb599dfdSHong Zhang 2961bb599dfdSHong Zhang References: 2962606c0280SSatish Balay . * - MUMPS Users' Guide 2963bb599dfdSHong Zhang 29641cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 2965bb599dfdSHong Zhang @*/ 2966d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2967d71ae5a4SJacob Faibussowitsch { 2968bb599dfdSHong Zhang PetscFunctionBegin; 2969bb599dfdSHong Zhang PetscValidType(F, 1); 297028b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2971cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 29723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2973bb599dfdSHong Zhang } 2974bb599dfdSHong Zhang 2975d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 2976d71ae5a4SJacob Faibussowitsch { 29770e6b8875SHong Zhang Mat spRHS; 29780e6b8875SHong Zhang 29790e6b8875SHong Zhang PetscFunctionBegin; 29809566063dSJacob Faibussowitsch PetscCall(MatCreateTranspose(spRHST, &spRHS)); 29819566063dSJacob Faibussowitsch PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 29829566063dSJacob Faibussowitsch PetscCall(MatDestroy(&spRHS)); 29833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 29840e6b8875SHong Zhang } 29850e6b8875SHong Zhang 29860e6b8875SHong Zhang /*@ 29872ef1f0ffSBarry Smith MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 29880e6b8875SHong Zhang 2989c3339decSBarry Smith Logically Collective 29900e6b8875SHong Zhang 299120f4b53cSBarry Smith Input Parameter: 299220f4b53cSBarry Smith . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 29930e6b8875SHong Zhang 29940e6b8875SHong Zhang Output Parameter: 299520f4b53cSBarry Smith . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 29960e6b8875SHong Zhang 29970e6b8875SHong Zhang Level: beginner 29980e6b8875SHong Zhang 29990e6b8875SHong Zhang References: 3000606c0280SSatish Balay . * - MUMPS Users' Guide 30010e6b8875SHong Zhang 30021cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 30030e6b8875SHong Zhang @*/ 3004d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3005d71ae5a4SJacob Faibussowitsch { 30060e6b8875SHong Zhang PetscBool flg; 30070e6b8875SHong Zhang 30080e6b8875SHong Zhang PetscFunctionBegin; 30090e6b8875SHong Zhang PetscValidType(F, 1); 301028b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 30119566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 301228b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 30130e6b8875SHong Zhang 3014cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 30153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 30160e6b8875SHong Zhang } 30170e6b8875SHong Zhang 3018a21f80fcSHong Zhang /*@ 3019a21f80fcSHong Zhang MatMumpsGetInfo - Get MUMPS parameter INFO() 3020a21f80fcSHong Zhang 3021c3339decSBarry Smith Logically Collective 3022a21f80fcSHong Zhang 3023a21f80fcSHong Zhang Input Parameters: 302411a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3025a21f80fcSHong Zhang - icntl - index of MUMPS parameter array INFO() 3026a21f80fcSHong Zhang 3027a21f80fcSHong Zhang Output Parameter: 3028a21f80fcSHong Zhang . ival - value of MUMPS INFO(icntl) 3029a21f80fcSHong Zhang 3030a21f80fcSHong Zhang Level: beginner 3031a21f80fcSHong Zhang 303296a0c994SBarry Smith References: 3033606c0280SSatish Balay . * - MUMPS Users' Guide 3034a21f80fcSHong Zhang 30351cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3036a21f80fcSHong Zhang @*/ 3037d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3038d71ae5a4SJacob Faibussowitsch { 3039bc6112feSHong Zhang PetscFunctionBegin; 30402989dfd4SHong Zhang PetscValidType(F, 1); 304128b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3042ca810319SHong Zhang PetscValidIntPointer(ival, 3); 3043cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 30443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3045bc6112feSHong Zhang } 3046bc6112feSHong Zhang 3047a21f80fcSHong Zhang /*@ 3048a21f80fcSHong Zhang MatMumpsGetInfog - Get MUMPS parameter INFOG() 3049a21f80fcSHong Zhang 3050c3339decSBarry Smith Logically Collective 3051a21f80fcSHong Zhang 3052a21f80fcSHong Zhang Input Parameters: 305311a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3054a21f80fcSHong Zhang - icntl - index of MUMPS parameter array INFOG() 3055a21f80fcSHong Zhang 3056a21f80fcSHong Zhang Output Parameter: 3057a21f80fcSHong Zhang . ival - value of MUMPS INFOG(icntl) 3058a21f80fcSHong Zhang 3059a21f80fcSHong Zhang Level: beginner 3060a21f80fcSHong Zhang 306196a0c994SBarry Smith References: 3062606c0280SSatish Balay . * - MUMPS Users' Guide 3063a21f80fcSHong Zhang 30641cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3065a21f80fcSHong Zhang @*/ 3066d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3067d71ae5a4SJacob Faibussowitsch { 3068bc6112feSHong Zhang PetscFunctionBegin; 30692989dfd4SHong Zhang PetscValidType(F, 1); 307028b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3071ca810319SHong Zhang PetscValidIntPointer(ival, 3); 3072cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 30733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3074bc6112feSHong Zhang } 3075bc6112feSHong Zhang 3076a21f80fcSHong Zhang /*@ 3077a21f80fcSHong Zhang MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3078a21f80fcSHong Zhang 3079c3339decSBarry Smith Logically Collective 3080a21f80fcSHong Zhang 3081a21f80fcSHong Zhang Input Parameters: 308211a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3083a21f80fcSHong Zhang - icntl - index of MUMPS parameter array RINFO() 3084a21f80fcSHong Zhang 3085a21f80fcSHong Zhang Output Parameter: 3086a21f80fcSHong Zhang . val - value of MUMPS RINFO(icntl) 3087a21f80fcSHong Zhang 3088a21f80fcSHong Zhang Level: beginner 3089a21f80fcSHong Zhang 309096a0c994SBarry Smith References: 3091606c0280SSatish Balay . * - MUMPS Users' Guide 3092a21f80fcSHong Zhang 30931cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3094a21f80fcSHong Zhang @*/ 3095d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3096d71ae5a4SJacob Faibussowitsch { 3097bc6112feSHong Zhang PetscFunctionBegin; 30982989dfd4SHong Zhang PetscValidType(F, 1); 309928b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3100bc6112feSHong Zhang PetscValidRealPointer(val, 3); 3101cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 31023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3103bc6112feSHong Zhang } 3104bc6112feSHong Zhang 3105a21f80fcSHong Zhang /*@ 3106a21f80fcSHong Zhang MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3107a21f80fcSHong Zhang 3108c3339decSBarry Smith Logically Collective 3109a21f80fcSHong Zhang 3110a21f80fcSHong Zhang Input Parameters: 311111a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3112a21f80fcSHong Zhang - icntl - index of MUMPS parameter array RINFOG() 3113a21f80fcSHong Zhang 3114a21f80fcSHong Zhang Output Parameter: 3115a21f80fcSHong Zhang . val - value of MUMPS RINFOG(icntl) 3116a21f80fcSHong Zhang 3117a21f80fcSHong Zhang Level: beginner 3118a21f80fcSHong Zhang 311996a0c994SBarry Smith References: 3120606c0280SSatish Balay . * - MUMPS Users' Guide 3121a21f80fcSHong Zhang 31221cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3123a21f80fcSHong Zhang @*/ 3124d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3125d71ae5a4SJacob Faibussowitsch { 3126bc6112feSHong Zhang PetscFunctionBegin; 31272989dfd4SHong Zhang PetscValidType(F, 1); 312828b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3129bc6112feSHong Zhang PetscValidRealPointer(val, 3); 3130cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 31313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3132bc6112feSHong Zhang } 3133bc6112feSHong Zhang 31345c0bae8cSAshish Patel /*@ 31355c0bae8cSAshish Patel MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 31365c0bae8cSAshish Patel 31375c0bae8cSAshish Patel Logically Collective 31385c0bae8cSAshish Patel 31395c0bae8cSAshish Patel Input Parameter: 31405c0bae8cSAshish Patel . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 31415c0bae8cSAshish Patel 31425c0bae8cSAshish Patel Output Parameters: 31435c0bae8cSAshish Patel + size - local size of the array. The size of the array is non-zero only on the host. 31445c0bae8cSAshish Patel - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 31455c0bae8cSAshish Patel for freeing this array. 31465c0bae8cSAshish Patel 31475c0bae8cSAshish Patel Level: beginner 31485c0bae8cSAshish Patel 31495c0bae8cSAshish Patel References: 31505c0bae8cSAshish Patel . * - MUMPS Users' Guide 31515c0bae8cSAshish Patel 31521cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 31535c0bae8cSAshish Patel @*/ 31545c0bae8cSAshish Patel PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 31555c0bae8cSAshish Patel { 31565c0bae8cSAshish Patel PetscFunctionBegin; 31575c0bae8cSAshish Patel PetscValidType(F, 1); 31585c0bae8cSAshish Patel PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 31590d6f747bSJacob Faibussowitsch PetscValidIntPointer(size, 2); 31600d6f747bSJacob Faibussowitsch PetscValidPointer(array, 3); 31615c0bae8cSAshish Patel PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 31625c0bae8cSAshish Patel PetscFunctionReturn(PETSC_SUCCESS); 31635c0bae8cSAshish Patel } 31645c0bae8cSAshish Patel 316524b6179bSKris Buschelman /*MC 31662692d6eeSBarry Smith MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 316724b6179bSKris Buschelman distributed and sequential matrices via the external package MUMPS. 316824b6179bSKris Buschelman 316911a5261eSBarry Smith Works with `MATAIJ` and `MATSBAIJ` matrices 317024b6179bSKris Buschelman 3171c2b89b5dSBarry Smith Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3172c2b89b5dSBarry Smith 31732ef1f0ffSBarry Smith Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 31742ef1f0ffSBarry Smith See details below. 3175217d3b1eSJunchao Zhang 31762ef1f0ffSBarry Smith Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3177c2b89b5dSBarry Smith 317824b6179bSKris Buschelman Options Database Keys: 31794422a9fcSPatrick Sanan + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 31804422a9fcSPatrick Sanan . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 31814422a9fcSPatrick Sanan . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 31824422a9fcSPatrick Sanan . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 31834422a9fcSPatrick Sanan . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3184b53c1a7fSBarry Smith . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3185b53c1a7fSBarry Smith Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 31864422a9fcSPatrick Sanan . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 31874422a9fcSPatrick Sanan . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 31884422a9fcSPatrick Sanan . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 31894422a9fcSPatrick Sanan . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 31904422a9fcSPatrick Sanan . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 31914422a9fcSPatrick Sanan . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 319245e3843bSPierre Jolivet . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 31934422a9fcSPatrick Sanan . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 319425aac85cSJunchao Zhang . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 31954422a9fcSPatrick Sanan . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 31964422a9fcSPatrick Sanan . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 31974422a9fcSPatrick Sanan . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 31984422a9fcSPatrick Sanan . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 31994422a9fcSPatrick Sanan . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 32004422a9fcSPatrick Sanan . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 32014422a9fcSPatrick Sanan . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 32024422a9fcSPatrick Sanan . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 32034422a9fcSPatrick Sanan . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 32044422a9fcSPatrick Sanan . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3205a0e18203SThibaut Appel . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3206a0e18203SThibaut Appel . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3207a0e18203SThibaut Appel . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3208*146931dbSPierre Jolivet . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 32094422a9fcSPatrick Sanan . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 32104422a9fcSPatrick Sanan . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 32114422a9fcSPatrick Sanan . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 32124422a9fcSPatrick Sanan . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3213217d3b1eSJunchao Zhang . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3214a0e18203SThibaut Appel . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3215217d3b1eSJunchao Zhang - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3216217d3b1eSJunchao Zhang Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 321724b6179bSKris Buschelman 321824b6179bSKris Buschelman Level: beginner 321924b6179bSKris Buschelman 322095452b02SPatrick Sanan Notes: 32212ef1f0ffSBarry Smith MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 32222ef1f0ffSBarry Smith error if the matrix is Hermitian. 322338548759SBarry Smith 322426cc229bSBarry Smith When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 322526cc229bSBarry Smith `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 322626cc229bSBarry Smith 32272ef1f0ffSBarry Smith When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 32282ef1f0ffSBarry Smith the failure with 32292ef1f0ffSBarry Smith .vb 32302ef1f0ffSBarry Smith KSPGetPC(ksp,&pc); 32312ef1f0ffSBarry Smith PCFactorGetMatrix(pc,&mat); 32322ef1f0ffSBarry Smith MatMumpsGetInfo(mat,....); 32332ef1f0ffSBarry Smith MatMumpsGetInfog(mat,....); etc. 32342ef1f0ffSBarry Smith .ve 32352ef1f0ffSBarry Smith Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 32369fc87aa7SBarry Smith 3237a5399872SJunchao Zhang MUMPS provides 64-bit integer support in two build modes: 3238a5399872SJunchao Zhang full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3239a5399872SJunchao Zhang requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 32408fcaa860SBarry Smith 3241a5399872SJunchao Zhang selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3242a5399872SJunchao Zhang MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3243a5399872SJunchao Zhang columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3244a5399872SJunchao Zhang integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3245a5399872SJunchao Zhang 3246a5399872SJunchao Zhang With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3247a5399872SJunchao Zhang 3248a5399872SJunchao Zhang Two modes to run MUMPS/PETSc with OpenMP 32492ef1f0ffSBarry Smith .vb 32502ef1f0ffSBarry Smith Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 32512ef1f0ffSBarry Smith threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 32522ef1f0ffSBarry Smith .ve 32538fcaa860SBarry Smith 32542ef1f0ffSBarry Smith .vb 32552ef1f0ffSBarry Smith -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 32562ef1f0ffSBarry Smith if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 32572ef1f0ffSBarry Smith .ve 32588fcaa860SBarry Smith 32598fcaa860SBarry Smith To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 32602ef1f0ffSBarry Smith (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 32612ef1f0ffSBarry Smith (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 32628fcaa860SBarry Smith libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 32638fcaa860SBarry Smith (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3264217d3b1eSJunchao Zhang 32658fcaa860SBarry Smith If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3266217d3b1eSJunchao Zhang processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3267217d3b1eSJunchao Zhang size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3268217d3b1eSJunchao Zhang are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3269217d3b1eSJunchao Zhang by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3270217d3b1eSJunchao Zhang In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3271217d3b1eSJunchao Zhang if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3272217d3b1eSJunchao Zhang MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3273217d3b1eSJunchao Zhang cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3274217d3b1eSJunchao Zhang problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 32758fcaa860SBarry Smith For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3276217d3b1eSJunchao Zhang examine the mapping result. 3277217d3b1eSJunchao Zhang 327811a5261eSBarry Smith PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 327911a5261eSBarry Smith for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 328011a5261eSBarry Smith calls `omp_set_num_threads`(m) internally before calling MUMPS. 3281217d3b1eSJunchao Zhang 3282217d3b1eSJunchao Zhang References: 3283606c0280SSatish Balay + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3284606c0280SSatish Balay - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3285217d3b1eSJunchao Zhang 32861cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 328724b6179bSKris Buschelman M*/ 328824b6179bSKris Buschelman 3289d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3290d71ae5a4SJacob Faibussowitsch { 329135bd34faSBarry Smith PetscFunctionBegin; 32922692d6eeSBarry Smith *type = MATSOLVERMUMPS; 32933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 329435bd34faSBarry Smith } 329535bd34faSBarry Smith 3296bccb9932SShri Abhyankar /* MatGetFactor for Seq and MPI AIJ matrices */ 3297d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3298d71ae5a4SJacob Faibussowitsch { 32992877fffaSHong Zhang Mat B; 33002877fffaSHong Zhang Mat_MUMPS *mumps; 3301ace3abfcSBarry Smith PetscBool isSeqAIJ; 33022c7c0729SBarry Smith PetscMPIInt size; 33032877fffaSHong Zhang 33042877fffaSHong Zhang PetscFunctionBegin; 3305eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 330603e5aca4SStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 330703e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 330803e5aca4SStefano Zampini *F = NULL; 330903e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 331003e5aca4SStefano Zampini } 3311eb1ec7c1SStefano Zampini #endif 33122877fffaSHong Zhang /* Create the factorization matrix */ 33139566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 33149566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 33159566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 33169566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 33179566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 33182877fffaSHong Zhang 33194dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 33202205254eSKarl Rupp 33212877fffaSHong Zhang B->ops->view = MatView_MUMPS; 332235bd34faSBarry Smith B->ops->getinfo = MatGetInfo_MUMPS; 33232205254eSKarl Rupp 33249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 33259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 33269566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 33279566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 33289566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 33299566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 33309566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 33319566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 33329566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 33339566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 33349566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 33355c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 33369566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 33379566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 33386444a565SStefano Zampini 3339450b117fSShri Abhyankar if (ftype == MAT_FACTOR_LU) { 3340450b117fSShri Abhyankar B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3341d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 3342bccb9932SShri Abhyankar if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3343bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 33449566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3345746480a1SHong Zhang mumps->sym = 0; 3346dcd589f8SShri Abhyankar } else { 334767877ebaSShri Abhyankar B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3348450b117fSShri Abhyankar B->factortype = MAT_FACTOR_CHOLESKY; 3349bccb9932SShri Abhyankar if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3350bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 33519566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 335259ac8732SStefano Zampini #if defined(PETSC_USE_COMPLEX) 335359ac8732SStefano Zampini mumps->sym = 2; 335459ac8732SStefano Zampini #else 3355b94d7dedSBarry Smith if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 33566fdc2a6dSBarry Smith else mumps->sym = 2; 335759ac8732SStefano Zampini #endif 3358450b117fSShri Abhyankar } 33592877fffaSHong Zhang 336000c67f3bSHong Zhang /* set solvertype */ 33619566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 33629566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 33639566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 33642c7c0729SBarry Smith if (size == 1) { 33654ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3366f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 33672c7c0729SBarry Smith } 33682877fffaSHong Zhang B->ops->destroy = MatDestroy_MUMPS; 3369e69c285eSBarry Smith B->data = (void *)mumps; 33702205254eSKarl Rupp 33712877fffaSHong Zhang *F = B; 3372413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3373413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3374413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3375d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 33763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 33772877fffaSHong Zhang } 33782877fffaSHong Zhang 3379bccb9932SShri Abhyankar /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3380d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3381d71ae5a4SJacob Faibussowitsch { 33822877fffaSHong Zhang Mat B; 33832877fffaSHong Zhang Mat_MUMPS *mumps; 3384ace3abfcSBarry Smith PetscBool isSeqSBAIJ; 33852c7c0729SBarry Smith PetscMPIInt size; 33862877fffaSHong Zhang 33872877fffaSHong Zhang PetscFunctionBegin; 3388eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 338903e5aca4SStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 339003e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 339103e5aca4SStefano Zampini *F = NULL; 339203e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 339303e5aca4SStefano Zampini } 3394eb1ec7c1SStefano Zampini #endif 33959566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 33969566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 33979566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 33989566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 3399e69c285eSBarry Smith 34004dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 34019566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3402bccb9932SShri Abhyankar if (isSeqSBAIJ) { 340316ebf90aSShri Abhyankar mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3404dcd589f8SShri Abhyankar } else { 3405bccb9932SShri Abhyankar mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3406bccb9932SShri Abhyankar } 3407bccb9932SShri Abhyankar 340867877ebaSShri Abhyankar B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3409bccb9932SShri Abhyankar B->ops->view = MatView_MUMPS; 3410722b6324SPierre Jolivet B->ops->getinfo = MatGetInfo_MUMPS; 34112205254eSKarl Rupp 34129566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 34139566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 34149566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 34159566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 34169566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 34179566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 34189566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 34199566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 34209566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 34219566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 34229566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 34235c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 34249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 34259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 34262205254eSKarl Rupp 3427f4762488SHong Zhang B->factortype = MAT_FACTOR_CHOLESKY; 342859ac8732SStefano Zampini #if defined(PETSC_USE_COMPLEX) 342959ac8732SStefano Zampini mumps->sym = 2; 343059ac8732SStefano Zampini #else 3431b94d7dedSBarry Smith if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 34326fdc2a6dSBarry Smith else mumps->sym = 2; 343359ac8732SStefano Zampini #endif 3434a214ac2aSShri Abhyankar 343500c67f3bSHong Zhang /* set solvertype */ 34369566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 34379566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 34389566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 34392c7c0729SBarry Smith if (size == 1) { 34404ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3441f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 34422c7c0729SBarry Smith } 34439566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3444f3c0ef26SHong Zhang B->ops->destroy = MatDestroy_MUMPS; 3445e69c285eSBarry Smith B->data = (void *)mumps; 34462205254eSKarl Rupp 34472877fffaSHong Zhang *F = B; 3448413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3449413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3450413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3451d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 34523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 34532877fffaSHong Zhang } 345497969023SHong Zhang 3455d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3456d71ae5a4SJacob Faibussowitsch { 345767877ebaSShri Abhyankar Mat B; 345867877ebaSShri Abhyankar Mat_MUMPS *mumps; 3459ace3abfcSBarry Smith PetscBool isSeqBAIJ; 34602c7c0729SBarry Smith PetscMPIInt size; 346167877ebaSShri Abhyankar 346267877ebaSShri Abhyankar PetscFunctionBegin; 346367877ebaSShri Abhyankar /* Create the factorization matrix */ 34649566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 34659566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 34669566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 34679566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 34689566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 3469450b117fSShri Abhyankar 34704dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 3471450b117fSShri Abhyankar if (ftype == MAT_FACTOR_LU) { 3472450b117fSShri Abhyankar B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3473450b117fSShri Abhyankar B->factortype = MAT_FACTOR_LU; 3474bccb9932SShri Abhyankar if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3475bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3476746480a1SHong Zhang mumps->sym = 0; 34779566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3478546078acSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3479bccb9932SShri Abhyankar 3480450b117fSShri Abhyankar B->ops->view = MatView_MUMPS; 3481722b6324SPierre Jolivet B->ops->getinfo = MatGetInfo_MUMPS; 34822205254eSKarl Rupp 34839566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 34849566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 34859566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 34869566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 34879566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 34889566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 34899566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 34909566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 34919566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 34929566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 34939566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 34945c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 34959566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 34969566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3497450b117fSShri Abhyankar 349800c67f3bSHong Zhang /* set solvertype */ 34999566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 35009566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 35019566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 35022c7c0729SBarry Smith if (size == 1) { 35034ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3504f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 35052c7c0729SBarry Smith } 35067ee00b23SStefano Zampini B->ops->destroy = MatDestroy_MUMPS; 35077ee00b23SStefano Zampini B->data = (void *)mumps; 35087ee00b23SStefano Zampini 35097ee00b23SStefano Zampini *F = B; 3510413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3511413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3512413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3513d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 35143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 35157ee00b23SStefano Zampini } 35167ee00b23SStefano Zampini 35177ee00b23SStefano Zampini /* MatGetFactor for Seq and MPI SELL matrices */ 3518d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3519d71ae5a4SJacob Faibussowitsch { 35207ee00b23SStefano Zampini Mat B; 35217ee00b23SStefano Zampini Mat_MUMPS *mumps; 35227ee00b23SStefano Zampini PetscBool isSeqSELL; 35232c7c0729SBarry Smith PetscMPIInt size; 35247ee00b23SStefano Zampini 35257ee00b23SStefano Zampini PetscFunctionBegin; 35267ee00b23SStefano Zampini /* Create the factorization matrix */ 35279566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 35289566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 35299566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 35309566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 35319566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 35327ee00b23SStefano Zampini 35334dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 35347ee00b23SStefano Zampini 35357ee00b23SStefano Zampini B->ops->view = MatView_MUMPS; 35367ee00b23SStefano Zampini B->ops->getinfo = MatGetInfo_MUMPS; 35377ee00b23SStefano Zampini 35389566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 35399566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 35409566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 35419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 35429566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 35439566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 35449566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 35459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 35469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 35479566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 35489566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 35495c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 35507ee00b23SStefano Zampini 35517ee00b23SStefano Zampini if (ftype == MAT_FACTOR_LU) { 35527ee00b23SStefano Zampini B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 35537ee00b23SStefano Zampini B->factortype = MAT_FACTOR_LU; 35547ee00b23SStefano Zampini if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 35557ee00b23SStefano Zampini else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 35567ee00b23SStefano Zampini mumps->sym = 0; 35579566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 35587ee00b23SStefano Zampini } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 35597ee00b23SStefano Zampini 35607ee00b23SStefano Zampini /* set solvertype */ 35619566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 35629566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 35639566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 35642c7c0729SBarry Smith if (size == 1) { 35654ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3566f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 35672c7c0729SBarry Smith } 3568450b117fSShri Abhyankar B->ops->destroy = MatDestroy_MUMPS; 3569e69c285eSBarry Smith B->data = (void *)mumps; 35702205254eSKarl Rupp 3571450b117fSShri Abhyankar *F = B; 3572413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3573413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3574413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3575d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 35763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3577450b117fSShri Abhyankar } 357842c9c57cSBarry Smith 35799d0448ceSStefano Zampini /* MatGetFactor for MATNEST matrices */ 35809d0448ceSStefano Zampini static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 35819d0448ceSStefano Zampini { 35829d0448ceSStefano Zampini Mat B, **mats; 35839d0448ceSStefano Zampini Mat_MUMPS *mumps; 35849d0448ceSStefano Zampini PetscInt nr, nc; 35859d0448ceSStefano Zampini PetscMPIInt size; 358603e5aca4SStefano Zampini PetscBool flg = PETSC_TRUE; 35879d0448ceSStefano Zampini 35889d0448ceSStefano Zampini PetscFunctionBegin; 35899d0448ceSStefano Zampini #if defined(PETSC_USE_COMPLEX) 359003e5aca4SStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 359103e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 359203e5aca4SStefano Zampini *F = NULL; 359303e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 359403e5aca4SStefano Zampini } 35959d0448ceSStefano Zampini #endif 35969d0448ceSStefano Zampini 359703e5aca4SStefano Zampini /* Return if some condition is not satisfied */ 359803e5aca4SStefano Zampini *F = NULL; 35999d0448ceSStefano Zampini PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 36009d0448ceSStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY) { 36019d0448ceSStefano Zampini IS *rows, *cols; 36029d0448ceSStefano Zampini PetscInt *m, *M; 36039d0448ceSStefano Zampini 36049d0448ceSStefano Zampini PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 36059d0448ceSStefano Zampini PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 36069d0448ceSStefano Zampini PetscCall(MatNestGetISs(A, rows, cols)); 36079d0448ceSStefano Zampini for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 360803e5aca4SStefano Zampini if (!flg) { 360903e5aca4SStefano Zampini PetscCall(PetscFree2(rows, cols)); 361003e5aca4SStefano Zampini PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 361103e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 361203e5aca4SStefano Zampini } 36139d0448ceSStefano Zampini PetscCall(PetscMalloc2(nr, &m, nr, &M)); 36149d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 36159d0448ceSStefano Zampini for (PetscInt r = 0; flg && r < nr; r++) 36169d0448ceSStefano Zampini for (PetscInt k = r + 1; flg && k < nr; k++) 36179d0448ceSStefano Zampini if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 36189d0448ceSStefano Zampini PetscCall(PetscFree2(m, M)); 36199d0448ceSStefano Zampini PetscCall(PetscFree2(rows, cols)); 362003e5aca4SStefano Zampini if (!flg) { 362103e5aca4SStefano Zampini PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 362203e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 362303e5aca4SStefano Zampini } 36249d0448ceSStefano Zampini } 36259d0448ceSStefano Zampini 36269d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 36279d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 36289d0448ceSStefano Zampini Mat sub = mats[r][c]; 36295d955bbbSStefano Zampini PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans; 36309d0448ceSStefano Zampini 36319d0448ceSStefano Zampini if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 36325d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 36335d955bbbSStefano Zampini if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 36345d955bbbSStefano Zampini else { 36355d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 36365d955bbbSStefano Zampini if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 36375d955bbbSStefano Zampini } 36389d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 36399d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 36409d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 36419d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 36429d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 36439d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 36449d0448ceSStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY) { 364503e5aca4SStefano Zampini if (r == c && !isSeqAIJ && !isMPIAIJ && !isSeqSBAIJ && !isMPISBAIJ) { 364603e5aca4SStefano Zampini PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 364703e5aca4SStefano Zampini flg = PETSC_FALSE; 364803e5aca4SStefano Zampini } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 364903e5aca4SStefano Zampini PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 365003e5aca4SStefano Zampini flg = PETSC_FALSE; 365103e5aca4SStefano Zampini } 365203e5aca4SStefano Zampini } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 365303e5aca4SStefano Zampini PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 365403e5aca4SStefano Zampini flg = PETSC_FALSE; 36559d0448ceSStefano Zampini } 36569d0448ceSStefano Zampini } 365703e5aca4SStefano Zampini } 365803e5aca4SStefano Zampini if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 36599d0448ceSStefano Zampini 36609d0448ceSStefano Zampini /* Create the factorization matrix */ 36619d0448ceSStefano Zampini PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 36629d0448ceSStefano Zampini PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 36639d0448ceSStefano Zampini PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 36649d0448ceSStefano Zampini PetscCall(MatSetUp(B)); 36659d0448ceSStefano Zampini 36669d0448ceSStefano Zampini PetscCall(PetscNew(&mumps)); 36679d0448ceSStefano Zampini 36689d0448ceSStefano Zampini B->ops->view = MatView_MUMPS; 36699d0448ceSStefano Zampini B->ops->getinfo = MatGetInfo_MUMPS; 36709d0448ceSStefano Zampini 36719d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 36729d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 36739d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 36749d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 36759d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 36769d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 36779d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 36789d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 36799d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 36809d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 36819d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 36829d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 36839d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 36849d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 36859d0448ceSStefano Zampini 36869d0448ceSStefano Zampini if (ftype == MAT_FACTOR_LU) { 36879d0448ceSStefano Zampini B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 36889d0448ceSStefano Zampini B->factortype = MAT_FACTOR_LU; 36899d0448ceSStefano Zampini mumps->sym = 0; 36909d0448ceSStefano Zampini } else { 36919d0448ceSStefano Zampini B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 36929d0448ceSStefano Zampini B->factortype = MAT_FACTOR_CHOLESKY; 36939d0448ceSStefano Zampini #if defined(PETSC_USE_COMPLEX) 36949d0448ceSStefano Zampini mumps->sym = 2; 36959d0448ceSStefano Zampini #else 36969d0448ceSStefano Zampini if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 36979d0448ceSStefano Zampini else mumps->sym = 2; 36989d0448ceSStefano Zampini #endif 36999d0448ceSStefano Zampini } 37009d0448ceSStefano Zampini mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 37019d0448ceSStefano Zampini PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 37029d0448ceSStefano Zampini 37039d0448ceSStefano Zampini PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 37049d0448ceSStefano Zampini if (size == 1) { 37059d0448ceSStefano Zampini /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 37069d0448ceSStefano Zampini B->canuseordering = PETSC_TRUE; 37079d0448ceSStefano Zampini } 37089d0448ceSStefano Zampini 37099d0448ceSStefano Zampini /* set solvertype */ 37109d0448ceSStefano Zampini PetscCall(PetscFree(B->solvertype)); 37119d0448ceSStefano Zampini PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 37129d0448ceSStefano Zampini B->ops->destroy = MatDestroy_MUMPS; 37139d0448ceSStefano Zampini B->data = (void *)mumps; 37149d0448ceSStefano Zampini 37159d0448ceSStefano Zampini *F = B; 37169d0448ceSStefano Zampini mumps->id.job = JOB_NULL; 37179d0448ceSStefano Zampini mumps->ICNTL_pre = NULL; 37189d0448ceSStefano Zampini mumps->CNTL_pre = NULL; 37199d0448ceSStefano Zampini mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 37209d0448ceSStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 37219d0448ceSStefano Zampini } 37229d0448ceSStefano Zampini 3723d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3724d71ae5a4SJacob Faibussowitsch { 372542c9c57cSBarry Smith PetscFunctionBegin; 37269566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 37279566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 37289566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 37299566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 37309566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 37319566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 37329566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 37339566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 37349566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 37359566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 37369566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 37379d0448ceSStefano Zampini PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 37389d0448ceSStefano Zampini PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 37393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 374042c9c57cSBarry Smith } 3741