11c2a3de1SBarry Smith 2397b6df1SKris Buschelman /* 3c2b5dc30SHong Zhang Provides an interface to the MUMPS sparse solver 4397b6df1SKris Buschelman */ 567602552SJunchao Zhang #include <petscpkg_version.h> 6c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 87ee00b23SStefano Zampini #include <../src/mat/impls/sell/mpi/mpisell.h> 9397b6df1SKris Buschelman 10397b6df1SKris Buschelman EXTERN_C_BEGIN 11397b6df1SKris Buschelman #if defined(PETSC_USE_COMPLEX) 122907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 132907cef9SHong Zhang #include <cmumps_c.h> 142907cef9SHong Zhang #else 15c6db04a5SJed Brown #include <zmumps_c.h> 162907cef9SHong Zhang #endif 172907cef9SHong Zhang #else 182907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 192907cef9SHong Zhang #include <smumps_c.h> 20397b6df1SKris Buschelman #else 21c6db04a5SJed Brown #include <dmumps_c.h> 22397b6df1SKris Buschelman #endif 232907cef9SHong Zhang #endif 24397b6df1SKris Buschelman EXTERN_C_END 25397b6df1SKris Buschelman #define JOB_INIT -1 26413bcc21SPierre Jolivet #define JOB_NULL 0 273d472b54SHong Zhang #define JOB_FACTSYMBOLIC 1 283d472b54SHong Zhang #define JOB_FACTNUMERIC 2 293d472b54SHong Zhang #define JOB_SOLVE 3 30397b6df1SKris Buschelman #define JOB_END -2 313d472b54SHong Zhang 322907cef9SHong Zhang /* calls to MUMPS */ 332907cef9SHong Zhang #if defined(PETSC_USE_COMPLEX) 342907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 353ab56b82SJunchao Zhang #define MUMPS_c cmumps_c 362907cef9SHong Zhang #else 373ab56b82SJunchao Zhang #define MUMPS_c zmumps_c 382907cef9SHong Zhang #endif 392907cef9SHong Zhang #else 402907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 413ab56b82SJunchao Zhang #define MUMPS_c smumps_c 422907cef9SHong Zhang #else 433ab56b82SJunchao Zhang #define MUMPS_c dmumps_c 442907cef9SHong Zhang #endif 452907cef9SHong Zhang #endif 462907cef9SHong Zhang 47a6053eceSJunchao Zhang /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 48a6053eceSJunchao Zhang number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 49a6053eceSJunchao Zhang naming convention in PetscMPIInt, PetscBLASInt etc. 50a6053eceSJunchao Zhang */ 51a6053eceSJunchao Zhang typedef MUMPS_INT PetscMUMPSInt; 52a6053eceSJunchao Zhang 5367602552SJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 5467602552SJunchao Zhang #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 55a6053eceSJunchao Zhang #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 5667602552SJunchao Zhang #endif 57a6053eceSJunchao Zhang #else 5867602552SJunchao Zhang #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 5967602552SJunchao Zhang #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 6067602552SJunchao Zhang #endif 6167602552SJunchao Zhang #endif 6267602552SJunchao Zhang 63a6053eceSJunchao Zhang #define MPIU_MUMPSINT MPI_INT 64a6053eceSJunchao Zhang #define PETSC_MUMPS_INT_MAX 2147483647 65a6053eceSJunchao Zhang #define PETSC_MUMPS_INT_MIN -2147483648 66a6053eceSJunchao Zhang 67a6053eceSJunchao Zhang /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 68d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 69d71ae5a4SJacob Faibussowitsch { 70a6053eceSJunchao Zhang PetscFunctionBegin; 71ece88022SPierre Jolivet #if PetscDefined(USE_64BIT_INDICES) 722c71b3e2SJacob Faibussowitsch PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 73ece88022SPierre Jolivet #endif 74a6053eceSJunchao Zhang *b = (PetscMUMPSInt)(a); 753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 76a6053eceSJunchao Zhang } 77a6053eceSJunchao Zhang 78a6053eceSJunchao Zhang /* Put these utility routines here since they are only used in this file */ 79d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 80d71ae5a4SJacob Faibussowitsch { 81a6053eceSJunchao Zhang PetscInt myval; 82a6053eceSJunchao Zhang PetscBool myset; 83a6053eceSJunchao Zhang PetscFunctionBegin; 84a6053eceSJunchao Zhang /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 859566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 869566063dSJacob Faibussowitsch if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 87a6053eceSJunchao Zhang if (set) *set = myset; 883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 89a6053eceSJunchao Zhang } 90a6053eceSJunchao Zhang #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 91a6053eceSJunchao Zhang 92217d3b1eSJunchao Zhang /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 933ab56b82SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 943ab56b82SJunchao Zhang #define PetscMUMPS_c(mumps) \ 953ab56b82SJunchao Zhang do { \ 963ab56b82SJunchao Zhang if (mumps->use_petsc_omp_support) { \ 973ab56b82SJunchao Zhang if (mumps->is_omp_master) { \ 989566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 9914ffdc6fSStefano Zampini PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 10014ffdc6fSStefano Zampini PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 10114ffdc6fSStefano Zampini PetscCall(PetscFPTrapPop()); \ 1029566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 1033ab56b82SJunchao Zhang } \ 1049566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 105c3714a1dSJunchao Zhang /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 106c3714a1dSJunchao Zhang to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 107c3714a1dSJunchao Zhang an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 108c3714a1dSJunchao Zhang omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 109c3714a1dSJunchao Zhang */ \ 110*338d3105SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 111*338d3105SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 112*338d3105SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 113*338d3105SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 1143ab56b82SJunchao Zhang } else { \ 11514ffdc6fSStefano Zampini PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 11614ffdc6fSStefano Zampini PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 11714ffdc6fSStefano Zampini PetscCall(PetscFPTrapPop()); \ 1183ab56b82SJunchao Zhang } \ 1193ab56b82SJunchao Zhang } while (0) 1203ab56b82SJunchao Zhang #else 1213ab56b82SJunchao Zhang #define PetscMUMPS_c(mumps) \ 122d71ae5a4SJacob Faibussowitsch do { \ 12314ffdc6fSStefano Zampini PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 12414ffdc6fSStefano Zampini PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 12514ffdc6fSStefano Zampini PetscCall(PetscFPTrapPop()); \ 126d71ae5a4SJacob Faibussowitsch } while (0) 1273ab56b82SJunchao Zhang #endif 1283ab56b82SJunchao Zhang 129940cd9d6SSatish Balay /* declare MumpsScalar */ 130940cd9d6SSatish Balay #if defined(PETSC_USE_COMPLEX) 131940cd9d6SSatish Balay #if defined(PETSC_USE_REAL_SINGLE) 132940cd9d6SSatish Balay #define MumpsScalar mumps_complex 133940cd9d6SSatish Balay #else 134940cd9d6SSatish Balay #define MumpsScalar mumps_double_complex 135940cd9d6SSatish Balay #endif 136940cd9d6SSatish Balay #else 137940cd9d6SSatish Balay #define MumpsScalar PetscScalar 138940cd9d6SSatish Balay #endif 1393d472b54SHong Zhang 140397b6df1SKris Buschelman /* macros s.t. indices match MUMPS documentation */ 141397b6df1SKris Buschelman #define ICNTL(I) icntl[(I)-1] 142397b6df1SKris Buschelman #define CNTL(I) cntl[(I)-1] 143397b6df1SKris Buschelman #define INFOG(I) infog[(I)-1] 144a7aca84bSHong Zhang #define INFO(I) info[(I)-1] 145397b6df1SKris Buschelman #define RINFOG(I) rinfog[(I)-1] 146adc1d99fSHong Zhang #define RINFO(I) rinfo[(I)-1] 147397b6df1SKris Buschelman 148a6053eceSJunchao Zhang typedef struct Mat_MUMPS Mat_MUMPS; 149a6053eceSJunchao Zhang struct Mat_MUMPS { 150397b6df1SKris Buschelman #if defined(PETSC_USE_COMPLEX) 1512907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 1522907cef9SHong Zhang CMUMPS_STRUC_C id; 1532907cef9SHong Zhang #else 154397b6df1SKris Buschelman ZMUMPS_STRUC_C id; 1552907cef9SHong Zhang #endif 1562907cef9SHong Zhang #else 1572907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 1582907cef9SHong Zhang SMUMPS_STRUC_C id; 159397b6df1SKris Buschelman #else 160397b6df1SKris Buschelman DMUMPS_STRUC_C id; 161397b6df1SKris Buschelman #endif 1622907cef9SHong Zhang #endif 1632907cef9SHong Zhang 164397b6df1SKris Buschelman MatStructure matstruc; 1652d4298aeSJunchao Zhang PetscMPIInt myid, petsc_size; 166a6053eceSJunchao Zhang PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 167a6053eceSJunchao Zhang PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 168a6053eceSJunchao Zhang PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 169a6053eceSJunchao Zhang PetscMUMPSInt sym; 1702d4298aeSJunchao Zhang MPI_Comm mumps_comm; 171413bcc21SPierre Jolivet PetscMUMPSInt *ICNTL_pre; 172413bcc21SPierre Jolivet PetscReal *CNTL_pre; 173a6053eceSJunchao Zhang PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 174801fbe65SHong Zhang VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 17525aac85cSJunchao Zhang PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 17667602552SJunchao Zhang PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 17767602552SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 17867602552SJunchao Zhang PetscInt *rhs_nrow, max_nrhs; 17967602552SJunchao Zhang PetscMPIInt *rhs_recvcounts, *rhs_disps; 18067602552SJunchao Zhang PetscScalar *rhs_loc, *rhs_recvbuf; 18167602552SJunchao Zhang #endif 182801fbe65SHong Zhang Vec b_seq, x_seq; 183a6053eceSJunchao Zhang PetscInt ninfo, *info; /* which INFO to display */ 184b5fa320bSStefano Zampini PetscInt sizeredrhs; 18559ac8732SStefano Zampini PetscScalar *schur_sol; 18659ac8732SStefano Zampini PetscInt schur_sizesol; 187a6053eceSJunchao Zhang PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 188a6053eceSJunchao Zhang PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 189a6053eceSJunchao Zhang PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 1902205254eSKarl Rupp 191a6053eceSJunchao Zhang /* stuff used by petsc/mumps OpenMP support*/ 1923ab56b82SJunchao Zhang PetscBool use_petsc_omp_support; 193da81f932SPierre Jolivet PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 1943ab56b82SJunchao Zhang MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 195a6053eceSJunchao Zhang PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 196a6053eceSJunchao Zhang PetscMPIInt tag, omp_comm_size; 1973ab56b82SJunchao Zhang PetscBool is_omp_master; /* is this rank the master of omp_comm */ 198a6053eceSJunchao Zhang MPI_Request *reqs; 199a6053eceSJunchao Zhang }; 2003ab56b82SJunchao Zhang 201a6053eceSJunchao Zhang /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 202a6053eceSJunchao Zhang Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 203a6053eceSJunchao Zhang */ 204d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 205d71ae5a4SJacob Faibussowitsch { 206a6053eceSJunchao Zhang PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 207f0c56d0fSKris Buschelman 208a6053eceSJunchao Zhang PetscFunctionBegin; 209a6053eceSJunchao Zhang #if defined(PETSC_USE_64BIT_INDICES) 210a6053eceSJunchao Zhang { 211a6053eceSJunchao Zhang PetscInt i; 212a6053eceSJunchao Zhang if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 2139566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ia_alloc)); 2149566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 215a6053eceSJunchao Zhang mumps->cur_ilen = nrow + 1; 216a6053eceSJunchao Zhang } 217a6053eceSJunchao Zhang if (nnz > mumps->cur_jlen) { 2189566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ja_alloc)); 2199566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 220a6053eceSJunchao Zhang mumps->cur_jlen = nnz; 221a6053eceSJunchao Zhang } 2229566063dSJacob Faibussowitsch for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 2239566063dSJacob Faibussowitsch for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 224a6053eceSJunchao Zhang *ia_mumps = mumps->ia_alloc; 225a6053eceSJunchao Zhang *ja_mumps = mumps->ja_alloc; 226a6053eceSJunchao Zhang } 227a6053eceSJunchao Zhang #else 228a6053eceSJunchao Zhang *ia_mumps = ia; 229a6053eceSJunchao Zhang *ja_mumps = ja; 230a6053eceSJunchao Zhang #endif 2319566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 2323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 233a6053eceSJunchao Zhang } 234b24902e0SBarry Smith 235d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 236d71ae5a4SJacob Faibussowitsch { 237b5fa320bSStefano Zampini PetscFunctionBegin; 2389566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.listvar_schur)); 2399566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.redrhs)); 2409566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->schur_sol)); 24159ac8732SStefano Zampini mumps->id.size_schur = 0; 242b3cb21ddSStefano Zampini mumps->id.schur_lld = 0; 24359ac8732SStefano Zampini mumps->id.ICNTL(19) = 0; 2443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 24559ac8732SStefano Zampini } 24659ac8732SStefano Zampini 247b3cb21ddSStefano Zampini /* solve with rhs in mumps->id.redrhs and return in the same location */ 248d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 249d71ae5a4SJacob Faibussowitsch { 250b3cb21ddSStefano Zampini Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 251b3cb21ddSStefano Zampini Mat S, B, X; 252b3cb21ddSStefano Zampini MatFactorSchurStatus schurstatus; 253b3cb21ddSStefano Zampini PetscInt sizesol; 25459ac8732SStefano Zampini 25559ac8732SStefano Zampini PetscFunctionBegin; 2569566063dSJacob Faibussowitsch PetscCall(MatFactorFactorizeSchurComplement(F)); 2579566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 2589566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 2599566063dSJacob Faibussowitsch PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 260a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2619566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(B, S->boundtocpu)); 262a3d589ffSStefano Zampini #endif 263b3cb21ddSStefano Zampini switch (schurstatus) { 264d71ae5a4SJacob Faibussowitsch case MAT_FACTOR_SCHUR_FACTORED: 265d71ae5a4SJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 266d71ae5a4SJacob Faibussowitsch PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 267a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2689566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(X, S->boundtocpu)); 269a3d589ffSStefano Zampini #endif 270b3cb21ddSStefano Zampini if (!mumps->id.ICNTL(9)) { /* transpose solve */ 2719566063dSJacob Faibussowitsch PetscCall(MatMatSolveTranspose(S, B, X)); 27259ac8732SStefano Zampini } else { 2739566063dSJacob Faibussowitsch PetscCall(MatMatSolve(S, B, X)); 27459ac8732SStefano Zampini } 275b3cb21ddSStefano Zampini break; 276b3cb21ddSStefano Zampini case MAT_FACTOR_SCHUR_INVERTED: 277b3cb21ddSStefano Zampini sizesol = mumps->id.nrhs * mumps->id.size_schur; 27859ac8732SStefano Zampini if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 2799566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->schur_sol)); 2809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 28159ac8732SStefano Zampini mumps->schur_sizesol = sizesol; 282b5fa320bSStefano Zampini } 2839566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 2849566063dSJacob Faibussowitsch PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 285a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2869566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(X, S->boundtocpu)); 287a3d589ffSStefano Zampini #endif 2889566063dSJacob Faibussowitsch PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 28959ac8732SStefano Zampini if (!mumps->id.ICNTL(9)) { /* transpose solve */ 2909566063dSJacob Faibussowitsch PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 291b5fa320bSStefano Zampini } else { 2929566063dSJacob Faibussowitsch PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 293b5fa320bSStefano Zampini } 2949566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(X)); 2959566063dSJacob Faibussowitsch PetscCall(MatProductSymbolic(X)); 2969566063dSJacob Faibussowitsch PetscCall(MatProductNumeric(X)); 2974417c5e8SHong Zhang 2989566063dSJacob Faibussowitsch PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 299b3cb21ddSStefano Zampini break; 300d71ae5a4SJacob Faibussowitsch default: 301d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 30259ac8732SStefano Zampini } 3039566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 3049566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 3059566063dSJacob Faibussowitsch PetscCall(MatDestroy(&X)); 3063ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 307b5fa320bSStefano Zampini } 308b5fa320bSStefano Zampini 309d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 310d71ae5a4SJacob Faibussowitsch { 311b3cb21ddSStefano Zampini Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 312b5fa320bSStefano Zampini 313b5fa320bSStefano Zampini PetscFunctionBegin; 314b5fa320bSStefano Zampini if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 3153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 316b5fa320bSStefano Zampini } 317b8f61ee1SStefano Zampini if (!expansion) { /* prepare for the condensation step */ 318b5fa320bSStefano Zampini PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 319b5fa320bSStefano Zampini /* allocate MUMPS internal array to store reduced right-hand sides */ 320b5fa320bSStefano Zampini if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 3219566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.redrhs)); 322b5fa320bSStefano Zampini mumps->id.lredrhs = mumps->id.size_schur; 3239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 324b5fa320bSStefano Zampini mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 325b5fa320bSStefano Zampini } 326b5fa320bSStefano Zampini mumps->id.ICNTL(26) = 1; /* condensation phase */ 327b5fa320bSStefano Zampini } else { /* prepare for the expansion step */ 328b8f61ee1SStefano Zampini /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 3299566063dSJacob Faibussowitsch PetscCall(MatMumpsSolveSchur_Private(F)); 330b5fa320bSStefano Zampini mumps->id.ICNTL(26) = 2; /* expansion phase */ 3313ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 33208401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 333b5fa320bSStefano Zampini /* restore defaults */ 334b5fa320bSStefano Zampini mumps->id.ICNTL(26) = -1; 335d3d598ffSStefano Zampini /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 336d3d598ffSStefano Zampini if (mumps->id.nrhs > 1) { 3379566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.redrhs)); 338d3d598ffSStefano Zampini mumps->id.lredrhs = 0; 339d3d598ffSStefano Zampini mumps->sizeredrhs = 0; 340d3d598ffSStefano Zampini } 341b5fa320bSStefano Zampini } 3423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 343b5fa320bSStefano Zampini } 344b5fa320bSStefano Zampini 345397b6df1SKris Buschelman /* 346d341cd04SHong Zhang MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 347d341cd04SHong Zhang 348397b6df1SKris Buschelman input: 34975480915SPierre Jolivet A - matrix in aij,baij or sbaij format 350397b6df1SKris Buschelman shift - 0: C style output triple; 1: Fortran style output triple. 351bccb9932SShri Abhyankar reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 352bccb9932SShri Abhyankar MAT_REUSE_MATRIX: only the values in v array are updated 353397b6df1SKris Buschelman output: 354397b6df1SKris Buschelman nnz - dim of r, c, and v (number of local nonzero entries of A) 355397b6df1SKris Buschelman r, c, v - row and col index, matrix values (matrix triples) 356eb9baa12SBarry Smith 357eb9baa12SBarry Smith The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 3587ee00b23SStefano Zampini freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 359eb9baa12SBarry Smith that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 360eb9baa12SBarry Smith 361397b6df1SKris Buschelman */ 36216ebf90aSShri Abhyankar 363d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 364d71ae5a4SJacob Faibussowitsch { 365a3d589ffSStefano Zampini const PetscScalar *av; 366185f6596SHong Zhang const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 367a6053eceSJunchao Zhang PetscInt64 nz, rnz, i, j, k; 368a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 36916ebf90aSShri Abhyankar Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 370397b6df1SKris Buschelman 371397b6df1SKris Buschelman PetscFunctionBegin; 3729566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A, &av)); 373a6053eceSJunchao Zhang mumps->val = (PetscScalar *)av; 374bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 3752205254eSKarl Rupp nz = aa->nz; 3762205254eSKarl Rupp ai = aa->i; 3772205254eSKarl Rupp aj = aa->j; 3789566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 379a6053eceSJunchao Zhang for (i = k = 0; i < M; i++) { 38016ebf90aSShri Abhyankar rnz = ai[i + 1] - ai[i]; 38167877ebaSShri Abhyankar ajj = aj + ai[i]; 38267877ebaSShri Abhyankar for (j = 0; j < rnz; j++) { 3839566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 3849566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 385a6053eceSJunchao Zhang k++; 38616ebf90aSShri Abhyankar } 38716ebf90aSShri Abhyankar } 388a6053eceSJunchao Zhang mumps->irn = row; 389a6053eceSJunchao Zhang mumps->jcn = col; 390a6053eceSJunchao Zhang mumps->nnz = nz; 39116ebf90aSShri Abhyankar } 3929566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 3933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 39416ebf90aSShri Abhyankar } 395397b6df1SKris Buschelman 396d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 397d71ae5a4SJacob Faibussowitsch { 398a6053eceSJunchao Zhang PetscInt64 nz, i, j, k, r; 3997ee00b23SStefano Zampini Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 400a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 4017ee00b23SStefano Zampini 4027ee00b23SStefano Zampini PetscFunctionBegin; 403a6053eceSJunchao Zhang mumps->val = a->val; 4047ee00b23SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 4057ee00b23SStefano Zampini nz = a->sliidx[a->totalslices]; 4069566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 407a6053eceSJunchao Zhang for (i = k = 0; i < a->totalslices; i++) { 40848a46eb9SPierre Jolivet for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 4097ee00b23SStefano Zampini } 4109566063dSJacob Faibussowitsch for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 411a6053eceSJunchao Zhang mumps->irn = row; 412a6053eceSJunchao Zhang mumps->jcn = col; 413a6053eceSJunchao Zhang mumps->nnz = nz; 4147ee00b23SStefano Zampini } 4153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4167ee00b23SStefano Zampini } 4177ee00b23SStefano Zampini 418d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 419d71ae5a4SJacob Faibussowitsch { 42067877ebaSShri Abhyankar Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 42133d57670SJed Brown const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 422a6053eceSJunchao Zhang PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 423a6053eceSJunchao Zhang PetscInt bs; 424a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 42567877ebaSShri Abhyankar 42667877ebaSShri Abhyankar PetscFunctionBegin; 4279566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 42833d57670SJed Brown M = A->rmap->N / bs; 429a6053eceSJunchao Zhang mumps->val = aa->a; 430bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 4319371c9d4SSatish Balay ai = aa->i; 4329371c9d4SSatish Balay aj = aa->j; 43367877ebaSShri Abhyankar nz = bs2 * aa->nz; 4349566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 43567877ebaSShri Abhyankar for (i = 0; i < M; i++) { 43667877ebaSShri Abhyankar ajj = aj + ai[i]; 43767877ebaSShri Abhyankar rnz = ai[i + 1] - ai[i]; 43867877ebaSShri Abhyankar for (k = 0; k < rnz; k++) { 43967877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 44067877ebaSShri Abhyankar for (m = 0; m < bs; m++) { 4419566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 4429566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 443a6053eceSJunchao Zhang idx++; 44467877ebaSShri Abhyankar } 44567877ebaSShri Abhyankar } 44667877ebaSShri Abhyankar } 44767877ebaSShri Abhyankar } 448a6053eceSJunchao Zhang mumps->irn = row; 449a6053eceSJunchao Zhang mumps->jcn = col; 450a6053eceSJunchao Zhang mumps->nnz = nz; 45167877ebaSShri Abhyankar } 4523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 45367877ebaSShri Abhyankar } 45467877ebaSShri Abhyankar 455d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 456d71ae5a4SJacob Faibussowitsch { 45775480915SPierre Jolivet const PetscInt *ai, *aj, *ajj; 458a6053eceSJunchao Zhang PetscInt bs; 459a6053eceSJunchao Zhang PetscInt64 nz, rnz, i, j, k, m; 460a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 46175480915SPierre Jolivet PetscScalar *val; 46216ebf90aSShri Abhyankar Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 46375480915SPierre Jolivet const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 46438548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 465b94d7dedSBarry Smith PetscBool isset, hermitian; 46638548759SBarry Smith #endif 46716ebf90aSShri Abhyankar 46816ebf90aSShri Abhyankar PetscFunctionBegin; 46938548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 470b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 471b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 47238548759SBarry Smith #endif 4732205254eSKarl Rupp ai = aa->i; 4742205254eSKarl Rupp aj = aa->j; 4759566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 47675480915SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 477f3fa974cSJacob Faibussowitsch const PetscInt64 alloc_size = aa->nz * bs2; 478f3fa974cSJacob Faibussowitsch 479f3fa974cSJacob Faibussowitsch PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 480a6053eceSJunchao Zhang if (bs > 1) { 481f3fa974cSJacob Faibussowitsch PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 482a6053eceSJunchao Zhang mumps->val = mumps->val_alloc; 48375480915SPierre Jolivet } else { 484a6053eceSJunchao Zhang mumps->val = aa->a; 48575480915SPierre Jolivet } 486a6053eceSJunchao Zhang mumps->irn = row; 487a6053eceSJunchao Zhang mumps->jcn = col; 488a6053eceSJunchao Zhang } else { 489a6053eceSJunchao Zhang if (bs == 1) mumps->val = aa->a; 490a6053eceSJunchao Zhang row = mumps->irn; 491a6053eceSJunchao Zhang col = mumps->jcn; 492a6053eceSJunchao Zhang } 493a6053eceSJunchao Zhang val = mumps->val; 494185f6596SHong Zhang 49516ebf90aSShri Abhyankar nz = 0; 496a81fe166SPierre Jolivet if (bs > 1) { 49775480915SPierre Jolivet for (i = 0; i < mbs; i++) { 49816ebf90aSShri Abhyankar rnz = ai[i + 1] - ai[i]; 49967877ebaSShri Abhyankar ajj = aj + ai[i]; 50075480915SPierre Jolivet for (j = 0; j < rnz; j++) { 50175480915SPierre Jolivet for (k = 0; k < bs; k++) { 50275480915SPierre Jolivet for (m = 0; m < bs; m++) { 503ec4f40fdSPierre Jolivet if (ajj[j] > i || k >= m) { 50475480915SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 5059566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 5069566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 50775480915SPierre Jolivet } 50875480915SPierre Jolivet val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 50975480915SPierre Jolivet } 51075480915SPierre Jolivet } 51175480915SPierre Jolivet } 51275480915SPierre Jolivet } 51375480915SPierre Jolivet } 514a81fe166SPierre Jolivet } else if (reuse == MAT_INITIAL_MATRIX) { 515a81fe166SPierre Jolivet for (i = 0; i < mbs; i++) { 516a81fe166SPierre Jolivet rnz = ai[i + 1] - ai[i]; 517a81fe166SPierre Jolivet ajj = aj + ai[i]; 518a81fe166SPierre Jolivet for (j = 0; j < rnz; j++) { 5199566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 5209566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 521a6053eceSJunchao Zhang nz++; 522a81fe166SPierre Jolivet } 523a81fe166SPierre Jolivet } 52408401ef6SPierre Jolivet PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 52575480915SPierre Jolivet } 526a6053eceSJunchao Zhang if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 5273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 52816ebf90aSShri Abhyankar } 52916ebf90aSShri Abhyankar 530d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 531d71ae5a4SJacob Faibussowitsch { 53267877ebaSShri Abhyankar const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 533a6053eceSJunchao Zhang PetscInt64 nz, rnz, i, j; 53467877ebaSShri Abhyankar const PetscScalar *av, *v1; 53516ebf90aSShri Abhyankar PetscScalar *val; 536a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 537829b1710SHong Zhang Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 53829b521d4Sstefano_zampini PetscBool missing; 53938548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 540b94d7dedSBarry Smith PetscBool hermitian, isset; 54138548759SBarry Smith #endif 54216ebf90aSShri Abhyankar 54316ebf90aSShri Abhyankar PetscFunctionBegin; 54438548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 545b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 546b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 54738548759SBarry Smith #endif 5489566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A, &av)); 5499371c9d4SSatish Balay ai = aa->i; 5509371c9d4SSatish Balay aj = aa->j; 55116ebf90aSShri Abhyankar adiag = aa->diag; 5529566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 553bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 5547ee00b23SStefano Zampini /* count nz in the upper triangular part of A */ 555829b1710SHong Zhang nz = 0; 55629b521d4Sstefano_zampini if (missing) { 55729b521d4Sstefano_zampini for (i = 0; i < M; i++) { 55829b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 55929b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 56029b521d4Sstefano_zampini if (aj[j] < i) continue; 56129b521d4Sstefano_zampini nz++; 56229b521d4Sstefano_zampini } 56329b521d4Sstefano_zampini } else { 56429b521d4Sstefano_zampini nz += ai[i + 1] - adiag[i]; 56529b521d4Sstefano_zampini } 56629b521d4Sstefano_zampini } 56729b521d4Sstefano_zampini } else { 568829b1710SHong Zhang for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 56929b521d4Sstefano_zampini } 5709566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 5719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 572a6053eceSJunchao Zhang mumps->nnz = nz; 573a6053eceSJunchao Zhang mumps->irn = row; 574a6053eceSJunchao Zhang mumps->jcn = col; 575a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 576185f6596SHong Zhang 57716ebf90aSShri Abhyankar nz = 0; 57829b521d4Sstefano_zampini if (missing) { 57929b521d4Sstefano_zampini for (i = 0; i < M; i++) { 58029b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 58129b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 58229b521d4Sstefano_zampini if (aj[j] < i) continue; 5839566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 5849566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 58529b521d4Sstefano_zampini val[nz] = av[j]; 58629b521d4Sstefano_zampini nz++; 58729b521d4Sstefano_zampini } 58829b521d4Sstefano_zampini } else { 58929b521d4Sstefano_zampini rnz = ai[i + 1] - adiag[i]; 59029b521d4Sstefano_zampini ajj = aj + adiag[i]; 59129b521d4Sstefano_zampini v1 = av + adiag[i]; 59229b521d4Sstefano_zampini for (j = 0; j < rnz; j++) { 5939566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 5949566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 595a6053eceSJunchao Zhang val[nz++] = v1[j]; 59629b521d4Sstefano_zampini } 59729b521d4Sstefano_zampini } 59829b521d4Sstefano_zampini } 59929b521d4Sstefano_zampini } else { 60016ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 60116ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 60267877ebaSShri Abhyankar ajj = aj + adiag[i]; 603cf3759fdSShri Abhyankar v1 = av + adiag[i]; 60467877ebaSShri Abhyankar for (j = 0; j < rnz; j++) { 6059566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 6069566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 607a6053eceSJunchao Zhang val[nz++] = v1[j]; 60816ebf90aSShri Abhyankar } 60916ebf90aSShri Abhyankar } 61029b521d4Sstefano_zampini } 611397b6df1SKris Buschelman } else { 612a6053eceSJunchao Zhang nz = 0; 613a6053eceSJunchao Zhang val = mumps->val; 61429b521d4Sstefano_zampini if (missing) { 61516ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 61629b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 61729b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 61829b521d4Sstefano_zampini if (aj[j] < i) continue; 61929b521d4Sstefano_zampini val[nz++] = av[j]; 62029b521d4Sstefano_zampini } 62129b521d4Sstefano_zampini } else { 62216ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 62367877ebaSShri Abhyankar v1 = av + adiag[i]; 624ad540459SPierre Jolivet for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 62516ebf90aSShri Abhyankar } 62616ebf90aSShri Abhyankar } 62729b521d4Sstefano_zampini } else { 62816ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 62916ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 63016ebf90aSShri Abhyankar v1 = av + adiag[i]; 631ad540459SPierre Jolivet for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 63216ebf90aSShri Abhyankar } 63316ebf90aSShri Abhyankar } 63429b521d4Sstefano_zampini } 6359566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 6363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 63716ebf90aSShri Abhyankar } 63816ebf90aSShri Abhyankar 639d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 640d71ae5a4SJacob Faibussowitsch { 641a6053eceSJunchao Zhang const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 642a6053eceSJunchao Zhang PetscInt bs; 643a6053eceSJunchao Zhang PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 644a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 64516ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 64616ebf90aSShri Abhyankar PetscScalar *val; 647397b6df1SKris Buschelman Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 648397b6df1SKris Buschelman Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 649397b6df1SKris Buschelman Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 650ec4f40fdSPierre Jolivet const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 65138548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 652b94d7dedSBarry Smith PetscBool hermitian, isset; 65338548759SBarry Smith #endif 65416ebf90aSShri Abhyankar 65516ebf90aSShri Abhyankar PetscFunctionBegin; 65638548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 657b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 658b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 65938548759SBarry Smith #endif 6609566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 66138548759SBarry Smith rstart = A->rmap->rstart; 66238548759SBarry Smith ai = aa->i; 66338548759SBarry Smith aj = aa->j; 66438548759SBarry Smith bi = bb->i; 66538548759SBarry Smith bj = bb->j; 66638548759SBarry Smith av = aa->a; 66738548759SBarry Smith bv = bb->a; 668397b6df1SKris Buschelman 6692205254eSKarl Rupp garray = mat->garray; 6702205254eSKarl Rupp 671bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 672a6053eceSJunchao Zhang nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 6739566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 6749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 675a6053eceSJunchao Zhang /* can not decide the exact mumps->nnz now because of the SBAIJ */ 676a6053eceSJunchao Zhang mumps->irn = row; 677a6053eceSJunchao Zhang mumps->jcn = col; 678a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 679397b6df1SKris Buschelman } else { 680a6053eceSJunchao Zhang val = mumps->val; 681397b6df1SKris Buschelman } 682397b6df1SKris Buschelman 6839371c9d4SSatish Balay jj = 0; 6849371c9d4SSatish Balay irow = rstart; 685ec4f40fdSPierre Jolivet for (i = 0; i < mbs; i++) { 686397b6df1SKris Buschelman ajj = aj + ai[i]; /* ptr to the beginning of this row */ 687397b6df1SKris Buschelman countA = ai[i + 1] - ai[i]; 688397b6df1SKris Buschelman countB = bi[i + 1] - bi[i]; 689397b6df1SKris Buschelman bjj = bj + bi[i]; 690ec4f40fdSPierre Jolivet v1 = av + ai[i] * bs2; 691ec4f40fdSPierre Jolivet v2 = bv + bi[i] * bs2; 692397b6df1SKris Buschelman 693ec4f40fdSPierre Jolivet if (bs > 1) { 694ec4f40fdSPierre Jolivet /* A-part */ 695ec4f40fdSPierre Jolivet for (j = 0; j < countA; j++) { 696ec4f40fdSPierre Jolivet for (k = 0; k < bs; k++) { 697ec4f40fdSPierre Jolivet for (m = 0; m < bs; m++) { 698ec4f40fdSPierre Jolivet if (rstart + ajj[j] * bs > irow || k >= m) { 699ec4f40fdSPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 7009566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 7019566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 702ec4f40fdSPierre Jolivet } 703ec4f40fdSPierre Jolivet val[jj++] = v1[j * bs2 + m + k * bs]; 704ec4f40fdSPierre Jolivet } 705ec4f40fdSPierre Jolivet } 706ec4f40fdSPierre Jolivet } 707ec4f40fdSPierre Jolivet } 708ec4f40fdSPierre Jolivet 709ec4f40fdSPierre Jolivet /* B-part */ 710ec4f40fdSPierre Jolivet for (j = 0; j < countB; j++) { 711ec4f40fdSPierre Jolivet for (k = 0; k < bs; k++) { 712ec4f40fdSPierre Jolivet for (m = 0; m < bs; m++) { 713ec4f40fdSPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 7149566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 7159566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 716ec4f40fdSPierre Jolivet } 717ec4f40fdSPierre Jolivet val[jj++] = v2[j * bs2 + m + k * bs]; 718ec4f40fdSPierre Jolivet } 719ec4f40fdSPierre Jolivet } 720ec4f40fdSPierre Jolivet } 721ec4f40fdSPierre Jolivet } else { 722397b6df1SKris Buschelman /* A-part */ 723397b6df1SKris Buschelman for (j = 0; j < countA; j++) { 724bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 7259566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 7269566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 727397b6df1SKris Buschelman } 72816ebf90aSShri Abhyankar val[jj++] = v1[j]; 729397b6df1SKris Buschelman } 73016ebf90aSShri Abhyankar 73116ebf90aSShri Abhyankar /* B-part */ 73216ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 733bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 7349566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 7359566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 736397b6df1SKris Buschelman } 73716ebf90aSShri Abhyankar val[jj++] = v2[j]; 73816ebf90aSShri Abhyankar } 73916ebf90aSShri Abhyankar } 740ec4f40fdSPierre Jolivet irow += bs; 741ec4f40fdSPierre Jolivet } 742a6053eceSJunchao Zhang mumps->nnz = jj; 7433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 74416ebf90aSShri Abhyankar } 74516ebf90aSShri Abhyankar 746d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 747d71ae5a4SJacob Faibussowitsch { 74816ebf90aSShri Abhyankar const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 749a6053eceSJunchao Zhang PetscInt64 rstart, nz, i, j, jj, irow, countA, countB; 750a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 75116ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 75216ebf90aSShri Abhyankar PetscScalar *val; 753a3d589ffSStefano Zampini Mat Ad, Ao; 754a3d589ffSStefano Zampini Mat_SeqAIJ *aa; 755a3d589ffSStefano Zampini Mat_SeqAIJ *bb; 75616ebf90aSShri Abhyankar 75716ebf90aSShri Abhyankar PetscFunctionBegin; 7589566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 7599566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 7609566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 761a3d589ffSStefano Zampini 762a3d589ffSStefano Zampini aa = (Mat_SeqAIJ *)(Ad)->data; 763a3d589ffSStefano Zampini bb = (Mat_SeqAIJ *)(Ao)->data; 76438548759SBarry Smith ai = aa->i; 76538548759SBarry Smith aj = aa->j; 76638548759SBarry Smith bi = bb->i; 76738548759SBarry Smith bj = bb->j; 76816ebf90aSShri Abhyankar 769a3d589ffSStefano Zampini rstart = A->rmap->rstart; 7702205254eSKarl Rupp 771bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 772a6053eceSJunchao Zhang nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 7739566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 7749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 775a6053eceSJunchao Zhang mumps->nnz = nz; 776a6053eceSJunchao Zhang mumps->irn = row; 777a6053eceSJunchao Zhang mumps->jcn = col; 778a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 77916ebf90aSShri Abhyankar } else { 780a6053eceSJunchao Zhang val = mumps->val; 78116ebf90aSShri Abhyankar } 78216ebf90aSShri Abhyankar 7839371c9d4SSatish Balay jj = 0; 7849371c9d4SSatish Balay irow = rstart; 78516ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 78616ebf90aSShri Abhyankar ajj = aj + ai[i]; /* ptr to the beginning of this row */ 78716ebf90aSShri Abhyankar countA = ai[i + 1] - ai[i]; 78816ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 78916ebf90aSShri Abhyankar bjj = bj + bi[i]; 79016ebf90aSShri Abhyankar v1 = av + ai[i]; 79116ebf90aSShri Abhyankar v2 = bv + bi[i]; 79216ebf90aSShri Abhyankar 79316ebf90aSShri Abhyankar /* A-part */ 79416ebf90aSShri Abhyankar for (j = 0; j < countA; j++) { 795bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 7969566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 7979566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 79816ebf90aSShri Abhyankar } 79916ebf90aSShri Abhyankar val[jj++] = v1[j]; 80016ebf90aSShri Abhyankar } 80116ebf90aSShri Abhyankar 80216ebf90aSShri Abhyankar /* B-part */ 80316ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 804bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 8059566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 8069566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 80716ebf90aSShri Abhyankar } 80816ebf90aSShri Abhyankar val[jj++] = v2[j]; 80916ebf90aSShri Abhyankar } 81016ebf90aSShri Abhyankar irow++; 81116ebf90aSShri Abhyankar } 8129566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 8139566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 8143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 81516ebf90aSShri Abhyankar } 81616ebf90aSShri Abhyankar 817d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 818d71ae5a4SJacob Faibussowitsch { 81967877ebaSShri Abhyankar Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 82067877ebaSShri Abhyankar Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 82167877ebaSShri Abhyankar Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 82267877ebaSShri Abhyankar const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 823d985c460SShri Abhyankar const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart; 82433d57670SJed Brown const PetscInt bs2 = mat->bs2; 825a6053eceSJunchao Zhang PetscInt bs; 826a6053eceSJunchao Zhang PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 827a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 82867877ebaSShri Abhyankar const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 82967877ebaSShri Abhyankar PetscScalar *val; 83067877ebaSShri Abhyankar 83167877ebaSShri Abhyankar PetscFunctionBegin; 8329566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 833bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 83467877ebaSShri Abhyankar nz = bs2 * (aa->nz + bb->nz); 8359566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 8369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 837a6053eceSJunchao Zhang mumps->nnz = nz; 838a6053eceSJunchao Zhang mumps->irn = row; 839a6053eceSJunchao Zhang mumps->jcn = col; 840a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 84167877ebaSShri Abhyankar } else { 842a6053eceSJunchao Zhang val = mumps->val; 84367877ebaSShri Abhyankar } 84467877ebaSShri Abhyankar 8459371c9d4SSatish Balay jj = 0; 8469371c9d4SSatish Balay irow = rstart; 84767877ebaSShri Abhyankar for (i = 0; i < mbs; i++) { 84867877ebaSShri Abhyankar countA = ai[i + 1] - ai[i]; 84967877ebaSShri Abhyankar countB = bi[i + 1] - bi[i]; 85067877ebaSShri Abhyankar ajj = aj + ai[i]; 85167877ebaSShri Abhyankar bjj = bj + bi[i]; 85267877ebaSShri Abhyankar v1 = av + bs2 * ai[i]; 85367877ebaSShri Abhyankar v2 = bv + bs2 * bi[i]; 85467877ebaSShri Abhyankar 85567877ebaSShri Abhyankar idx = 0; 85667877ebaSShri Abhyankar /* A-part */ 85767877ebaSShri Abhyankar for (k = 0; k < countA; k++) { 85867877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 85967877ebaSShri Abhyankar for (n = 0; n < bs; n++) { 860bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 8619566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 8629566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj])); 86367877ebaSShri Abhyankar } 86467877ebaSShri Abhyankar val[jj++] = v1[idx++]; 86567877ebaSShri Abhyankar } 86667877ebaSShri Abhyankar } 86767877ebaSShri Abhyankar } 86867877ebaSShri Abhyankar 86967877ebaSShri Abhyankar idx = 0; 87067877ebaSShri Abhyankar /* B-part */ 87167877ebaSShri Abhyankar for (k = 0; k < countB; k++) { 87267877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 87367877ebaSShri Abhyankar for (n = 0; n < bs; n++) { 874bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 8759566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 8769566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 87767877ebaSShri Abhyankar } 878d985c460SShri Abhyankar val[jj++] = v2[idx++]; 87967877ebaSShri Abhyankar } 88067877ebaSShri Abhyankar } 88167877ebaSShri Abhyankar } 882d985c460SShri Abhyankar irow += bs; 88367877ebaSShri Abhyankar } 8843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 88567877ebaSShri Abhyankar } 88667877ebaSShri Abhyankar 887d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 888d71ae5a4SJacob Faibussowitsch { 88916ebf90aSShri Abhyankar const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 890a6053eceSJunchao Zhang PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 891a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 89216ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 89316ebf90aSShri Abhyankar PetscScalar *val; 894a3d589ffSStefano Zampini Mat Ad, Ao; 895a3d589ffSStefano Zampini Mat_SeqAIJ *aa; 896a3d589ffSStefano Zampini Mat_SeqAIJ *bb; 89738548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 898b94d7dedSBarry Smith PetscBool hermitian, isset; 89938548759SBarry Smith #endif 90016ebf90aSShri Abhyankar 90116ebf90aSShri Abhyankar PetscFunctionBegin; 90238548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 903b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 904b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 90538548759SBarry Smith #endif 9069566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 9079566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 9089566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 909a3d589ffSStefano Zampini 910a3d589ffSStefano Zampini aa = (Mat_SeqAIJ *)(Ad)->data; 911a3d589ffSStefano Zampini bb = (Mat_SeqAIJ *)(Ao)->data; 91238548759SBarry Smith ai = aa->i; 91338548759SBarry Smith aj = aa->j; 91438548759SBarry Smith adiag = aa->diag; 91538548759SBarry Smith bi = bb->i; 91638548759SBarry Smith bj = bb->j; 9172205254eSKarl Rupp 91816ebf90aSShri Abhyankar rstart = A->rmap->rstart; 91916ebf90aSShri Abhyankar 920bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 921e0bace9bSHong Zhang nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 922e0bace9bSHong Zhang nzb = 0; /* num of upper triangular entries in mat->B */ 92316ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 924e0bace9bSHong Zhang nza += (ai[i + 1] - adiag[i]); 92516ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 92616ebf90aSShri Abhyankar bjj = bj + bi[i]; 927e0bace9bSHong Zhang for (j = 0; j < countB; j++) { 928e0bace9bSHong Zhang if (garray[bjj[j]] > rstart) nzb++; 929e0bace9bSHong Zhang } 930e0bace9bSHong Zhang } 93116ebf90aSShri Abhyankar 932e0bace9bSHong Zhang nz = nza + nzb; /* total nz of upper triangular part of mat */ 9339566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 9349566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 935a6053eceSJunchao Zhang mumps->nnz = nz; 936a6053eceSJunchao Zhang mumps->irn = row; 937a6053eceSJunchao Zhang mumps->jcn = col; 938a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 93916ebf90aSShri Abhyankar } else { 940a6053eceSJunchao Zhang val = mumps->val; 94116ebf90aSShri Abhyankar } 94216ebf90aSShri Abhyankar 9439371c9d4SSatish Balay jj = 0; 9449371c9d4SSatish Balay irow = rstart; 94516ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 94616ebf90aSShri Abhyankar ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 94716ebf90aSShri Abhyankar v1 = av + adiag[i]; 94816ebf90aSShri Abhyankar countA = ai[i + 1] - adiag[i]; 94916ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 95016ebf90aSShri Abhyankar bjj = bj + bi[i]; 95116ebf90aSShri Abhyankar v2 = bv + bi[i]; 95216ebf90aSShri Abhyankar 95316ebf90aSShri Abhyankar /* A-part */ 95416ebf90aSShri Abhyankar for (j = 0; j < countA; j++) { 955bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 9569566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 9579566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 95816ebf90aSShri Abhyankar } 95916ebf90aSShri Abhyankar val[jj++] = v1[j]; 96016ebf90aSShri Abhyankar } 96116ebf90aSShri Abhyankar 96216ebf90aSShri Abhyankar /* B-part */ 96316ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 96416ebf90aSShri Abhyankar if (garray[bjj[j]] > rstart) { 965bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 9669566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 9679566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 96816ebf90aSShri Abhyankar } 96916ebf90aSShri Abhyankar val[jj++] = v2[j]; 97016ebf90aSShri Abhyankar } 971397b6df1SKris Buschelman } 972397b6df1SKris Buschelman irow++; 973397b6df1SKris Buschelman } 9749566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 9759566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 9763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 977397b6df1SKris Buschelman } 978397b6df1SKris Buschelman 979d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_MUMPS(Mat A) 980d71ae5a4SJacob Faibussowitsch { 981a6053eceSJunchao Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 982b24902e0SBarry Smith 983397b6df1SKris Buschelman PetscFunctionBegin; 9849566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 9859566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 9869566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 9879566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->b_seq)); 9889566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->x_seq)); 9899566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.perm_in)); 9909566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 9919566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->val_alloc)); 9929566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->info)); 993413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->ICNTL_pre)); 994413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->CNTL_pre)); 9959566063dSJacob Faibussowitsch PetscCall(MatMumpsResetSchur_Private(mumps)); 996413bcc21SPierre Jolivet if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 997a5e57a09SHong Zhang mumps->id.job = JOB_END; 9983ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 99908401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1000413bcc21SPierre Jolivet if (mumps->mumps_comm != MPI_COMM_NULL) { 1001413bcc21SPierre Jolivet if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1002413bcc21SPierre Jolivet else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1003413bcc21SPierre Jolivet } 1004413bcc21SPierre Jolivet } 10053ab56b82SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 100667602552SJunchao Zhang if (mumps->use_petsc_omp_support) { 10079566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 10089566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 10099566063dSJacob Faibussowitsch PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 101067602552SJunchao Zhang } 10113ab56b82SJunchao Zhang #endif 10129566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ia_alloc)); 10139566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ja_alloc)); 10149566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->recvcount)); 10159566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->reqs)); 10169566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->irhs_loc)); 10179566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1018bf0cc555SLisandro Dalcin 101997969023SHong Zhang /* clear composed functions */ 10209566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 10219566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 10229566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 10239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 10249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 10259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 10269566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 10279566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 10289566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 10299566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 10309566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 10315c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 10329566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 10339566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 10343ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1035397b6df1SKris Buschelman } 1036397b6df1SKris Buschelman 103767602552SJunchao Zhang /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1038d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1039d71ae5a4SJacob Faibussowitsch { 104067602552SJunchao Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 104167602552SJunchao Zhang const PetscMPIInt ompsize = mumps->omp_comm_size; 104267602552SJunchao Zhang PetscInt i, m, M, rstart; 104367602552SJunchao Zhang 104467602552SJunchao Zhang PetscFunctionBegin; 10459566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &M, NULL)); 10469566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &m, NULL)); 104708401ef6SPierre Jolivet PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 104867602552SJunchao Zhang if (ompsize == 1) { 104967602552SJunchao Zhang if (!mumps->irhs_loc) { 105067602552SJunchao Zhang mumps->nloc_rhs = m; 10519566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 10529566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 105367602552SJunchao Zhang for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 105467602552SJunchao Zhang } 105567602552SJunchao Zhang mumps->id.rhs_loc = (MumpsScalar *)array; 105667602552SJunchao Zhang } else { 105767602552SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 105867602552SJunchao Zhang const PetscInt *ranges; 105967602552SJunchao Zhang PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 106067602552SJunchao Zhang MPI_Group petsc_group, omp_group; 106167602552SJunchao Zhang PetscScalar *recvbuf = NULL; 106267602552SJunchao Zhang 106367602552SJunchao Zhang if (mumps->is_omp_master) { 106467602552SJunchao Zhang /* Lazily initialize the omp stuff for distributed rhs */ 106567602552SJunchao Zhang if (!mumps->irhs_loc) { 10669566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 10679566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 10689566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 10699566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 107067602552SJunchao Zhang for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 10719566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 107267602552SJunchao Zhang 107367602552SJunchao Zhang /* Populate mumps->irhs_loc[], rhs_nrow[] */ 107467602552SJunchao Zhang mumps->nloc_rhs = 0; 10759566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(A, &ranges)); 107667602552SJunchao Zhang for (j = 0; j < ompsize; j++) { 107767602552SJunchao Zhang mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 107867602552SJunchao Zhang mumps->nloc_rhs += mumps->rhs_nrow[j]; 107967602552SJunchao Zhang } 10809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 108167602552SJunchao Zhang for (j = k = 0; j < ompsize; j++) { 108267602552SJunchao Zhang for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 108367602552SJunchao Zhang } 108467602552SJunchao Zhang 10859566063dSJacob Faibussowitsch PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 10869566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&petsc_group)); 10879566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&omp_group)); 108867602552SJunchao Zhang } 108967602552SJunchao Zhang 109067602552SJunchao Zhang /* Realloc buffers when current nrhs is bigger than what we have met */ 109167602552SJunchao Zhang if (nrhs > mumps->max_nrhs) { 10929566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 10939566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 109467602552SJunchao Zhang mumps->max_nrhs = nrhs; 109567602552SJunchao Zhang } 109667602552SJunchao Zhang 109767602552SJunchao Zhang /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 10989566063dSJacob Faibussowitsch for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 109967602552SJunchao Zhang mumps->rhs_disps[0] = 0; 110067602552SJunchao Zhang for (j = 1; j < ompsize; j++) { 110167602552SJunchao Zhang mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 110208401ef6SPierre Jolivet PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 110367602552SJunchao Zhang } 110467602552SJunchao Zhang recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 110567602552SJunchao Zhang } 110667602552SJunchao Zhang 11079566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 11089566063dSJacob Faibussowitsch PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 110967602552SJunchao Zhang 111067602552SJunchao Zhang if (mumps->is_omp_master) { 111167602552SJunchao Zhang if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 111267602552SJunchao Zhang PetscScalar *dst, *dstbase = mumps->rhs_loc; 111367602552SJunchao Zhang for (j = 0; j < ompsize; j++) { 111467602552SJunchao Zhang const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 111567602552SJunchao Zhang dst = dstbase; 111667602552SJunchao Zhang for (i = 0; i < nrhs; i++) { 11179566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 111867602552SJunchao Zhang src += mumps->rhs_nrow[j]; 111967602552SJunchao Zhang dst += mumps->nloc_rhs; 112067602552SJunchao Zhang } 112167602552SJunchao Zhang dstbase += mumps->rhs_nrow[j]; 112267602552SJunchao Zhang } 112367602552SJunchao Zhang } 112467602552SJunchao Zhang mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 112567602552SJunchao Zhang } 112667602552SJunchao Zhang #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 112767602552SJunchao Zhang } 112867602552SJunchao Zhang mumps->id.nrhs = nrhs; 112967602552SJunchao Zhang mumps->id.nloc_rhs = mumps->nloc_rhs; 113067602552SJunchao Zhang mumps->id.lrhs_loc = mumps->nloc_rhs; 113167602552SJunchao Zhang mumps->id.irhs_loc = mumps->irhs_loc; 11323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 113367602552SJunchao Zhang } 113467602552SJunchao Zhang 1135d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1136d71ae5a4SJacob Faibussowitsch { 1137e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 113825aac85cSJunchao Zhang const PetscScalar *rarray = NULL; 1139d54de34fSKris Buschelman PetscScalar *array; 1140329ec9b3SHong Zhang IS is_iden, is_petsc; 1141329ec9b3SHong Zhang PetscInt i; 1142cc86f929SStefano Zampini PetscBool second_solve = PETSC_FALSE; 1143883f2eb9SBarry Smith static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1144397b6df1SKris Buschelman 1145397b6df1SKris Buschelman PetscFunctionBegin; 11469371c9d4SSatish Balay PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 11479371c9d4SSatish Balay "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 11489371c9d4SSatish Balay &cite1)); 11499371c9d4SSatish Balay PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 11509371c9d4SSatish Balay "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 11519371c9d4SSatish Balay &cite2)); 11522aca8efcSHong Zhang 1153603e8f96SBarry Smith if (A->factorerrortype) { 11549566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 11559566063dSJacob Faibussowitsch PetscCall(VecSetInf(x)); 11563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 11572aca8efcSHong Zhang } 11582aca8efcSHong Zhang 1159a5e57a09SHong Zhang mumps->id.nrhs = 1; 11602d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 116125aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 116267602552SJunchao Zhang mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 11639566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(b, &rarray)); 11649566063dSJacob Faibussowitsch PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 116525aac85cSJunchao Zhang } else { 116641ffd417SStefano Zampini mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 11679566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 11689566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 116967602552SJunchao Zhang if (!mumps->myid) { 11709566063dSJacob Faibussowitsch PetscCall(VecGetArray(mumps->b_seq, &array)); 117167602552SJunchao Zhang mumps->id.rhs = (MumpsScalar *)array; 117267602552SJunchao Zhang } 117325aac85cSJunchao Zhang } 11743ab56b82SJunchao Zhang } else { /* petsc_size == 1 */ 117567602552SJunchao Zhang mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 11769566063dSJacob Faibussowitsch PetscCall(VecCopy(b, x)); 11779566063dSJacob Faibussowitsch PetscCall(VecGetArray(x, &array)); 1178940cd9d6SSatish Balay mumps->id.rhs = (MumpsScalar *)array; 1179397b6df1SKris Buschelman } 1180397b6df1SKris Buschelman 1181cc86f929SStefano Zampini /* 1182cc86f929SStefano Zampini handle condensation step of Schur complement (if any) 1183cc86f929SStefano Zampini We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1184cc86f929SStefano Zampini According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1185cc86f929SStefano Zampini Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1186cc86f929SStefano Zampini This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1187cc86f929SStefano Zampini */ 1188583f777eSStefano Zampini if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 118908401ef6SPierre Jolivet PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1190cc86f929SStefano Zampini second_solve = PETSC_TRUE; 11919566063dSJacob Faibussowitsch PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1192cc86f929SStefano Zampini } 1193397b6df1SKris Buschelman /* solve phase */ 1194a5e57a09SHong Zhang mumps->id.job = JOB_SOLVE; 11953ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 119608401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1197397b6df1SKris Buschelman 1198b5fa320bSStefano Zampini /* handle expansion step of Schur complement (if any) */ 11991baa6e33SBarry Smith if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1200b5fa320bSStefano Zampini 12012d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1202a5e57a09SHong Zhang if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1203a5e57a09SHong Zhang /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 12049566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1205397b6df1SKris Buschelman } 1206a5e57a09SHong Zhang if (!mumps->scat_sol) { /* create scatter scat_sol */ 1207a6053eceSJunchao Zhang PetscInt *isol2_loc = NULL; 12089566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 12099566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1210a6053eceSJunchao Zhang for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 12119566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 12129566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 12139566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_iden)); 12149566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_petsc)); 1215a5e57a09SHong Zhang mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1216397b6df1SKris Buschelman } 1217a5e57a09SHong Zhang 12189566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 12199566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1220329ec9b3SHong Zhang } 1221353d7d71SJunchao Zhang 122267602552SJunchao Zhang if (mumps->petsc_size > 1) { 122325aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 12249566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(b, &rarray)); 122525aac85cSJunchao Zhang } else if (!mumps->myid) { 12269566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mumps->b_seq, &array)); 122725aac85cSJunchao Zhang } 12289566063dSJacob Faibussowitsch } else PetscCall(VecRestoreArray(x, &array)); 1229353d7d71SJunchao Zhang 12309566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3))); 12313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1232397b6df1SKris Buschelman } 1233397b6df1SKris Buschelman 1234d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1235d71ae5a4SJacob Faibussowitsch { 1236e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1237*338d3105SPierre Jolivet const PetscMUMPSInt value = mumps->id.ICNTL(9); 123851d5961aSHong Zhang 123951d5961aSHong Zhang PetscFunctionBegin; 1240a5e57a09SHong Zhang mumps->id.ICNTL(9) = 0; 12419566063dSJacob Faibussowitsch PetscCall(MatSolve_MUMPS(A, b, x)); 1242*338d3105SPierre Jolivet mumps->id.ICNTL(9) = value; 12433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 124451d5961aSHong Zhang } 124551d5961aSHong Zhang 1246d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1247d71ae5a4SJacob Faibussowitsch { 1248b8491c3eSStefano Zampini Mat Bt = NULL; 1249a6053eceSJunchao Zhang PetscBool denseX, denseB, flg, flgT; 1250e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1251334c5f61SHong Zhang PetscInt i, nrhs, M; 12521683a169SBarry Smith PetscScalar *array; 12531683a169SBarry Smith const PetscScalar *rbray; 1254a6053eceSJunchao Zhang PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1255a6053eceSJunchao Zhang PetscMUMPSInt *isol_loc, *isol_loc_save; 12561683a169SBarry Smith PetscScalar *bray, *sol_loc, *sol_loc_save; 1257be818407SHong Zhang IS is_to, is_from; 1258beae5ec0SHong Zhang PetscInt k, proc, j, m, myrstart; 1259be818407SHong Zhang const PetscInt *rstart; 126067602552SJunchao Zhang Vec v_mpi, msol_loc; 126167602552SJunchao Zhang VecScatter scat_sol; 126267602552SJunchao Zhang Vec b_seq; 126367602552SJunchao Zhang VecScatter scat_rhs; 1264be818407SHong Zhang PetscScalar *aa; 1265be818407SHong Zhang PetscInt spnr, *ia, *ja; 1266d56c302dSHong Zhang Mat_MPIAIJ *b = NULL; 1267bda8bf91SBarry Smith 1268e0b74bf9SHong Zhang PetscFunctionBegin; 12699566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 127028b400f6SJacob Faibussowitsch PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1271be818407SHong Zhang 12729566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1273a6053eceSJunchao Zhang if (denseB) { 127408401ef6SPierre Jolivet PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1275be818407SHong Zhang mumps->id.ICNTL(20) = 0; /* dense RHS */ 12760e6b8875SHong Zhang } else { /* sparse B */ 127708401ef6SPierre Jolivet PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1278013e2dc7SBarry Smith PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1279da81f932SPierre Jolivet if (flgT) { /* input B is transpose of actual RHS matrix, 12800e6b8875SHong Zhang because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 12819566063dSJacob Faibussowitsch PetscCall(MatTransposeGetMat(B, &Bt)); 1282013e2dc7SBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1283be818407SHong Zhang mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1284b8491c3eSStefano Zampini } 128587b22cf4SHong Zhang 12869566063dSJacob Faibussowitsch PetscCall(MatGetSize(B, &M, &nrhs)); 12879481e6e9SHong Zhang mumps->id.nrhs = nrhs; 12889481e6e9SHong Zhang mumps->id.lrhs = M; 12892b691707SHong Zhang mumps->id.rhs = NULL; 12909481e6e9SHong Zhang 12912d4298aeSJunchao Zhang if (mumps->petsc_size == 1) { 1292b8491c3eSStefano Zampini PetscScalar *aa; 1293b8491c3eSStefano Zampini PetscInt spnr, *ia, *ja; 1294e94cce23SStefano Zampini PetscBool second_solve = PETSC_FALSE; 1295b8491c3eSStefano Zampini 12969566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &array)); 1297b8491c3eSStefano Zampini mumps->id.rhs = (MumpsScalar *)array; 12982b691707SHong Zhang 1299a6053eceSJunchao Zhang if (denseB) { 13002b691707SHong Zhang /* copy B to X */ 13019566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &rbray)); 13029566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 13039566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 13042b691707SHong Zhang } else { /* sparse B */ 13059566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(Bt, &aa)); 13069566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 130728b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 13089566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1309b8491c3eSStefano Zampini mumps->id.rhs_sparse = (MumpsScalar *)aa; 1310b8491c3eSStefano Zampini } 1311e94cce23SStefano Zampini /* handle condensation step of Schur complement (if any) */ 1312583f777eSStefano Zampini if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1313e94cce23SStefano Zampini second_solve = PETSC_TRUE; 13149566063dSJacob Faibussowitsch PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1315e94cce23SStefano Zampini } 13162cd7d884SHong Zhang /* solve phase */ 13172cd7d884SHong Zhang mumps->id.job = JOB_SOLVE; 13183ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 131908401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1320b5fa320bSStefano Zampini 1321b5fa320bSStefano Zampini /* handle expansion step of Schur complement (if any) */ 13221baa6e33SBarry Smith if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1323a6053eceSJunchao Zhang if (!denseB) { /* sparse B */ 13249566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 13259566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 132628b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1327b8491c3eSStefano Zampini } 13289566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &array)); 13293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1330be818407SHong Zhang } 1331801fbe65SHong Zhang 13322ef1f0ffSBarry Smith /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1333aed4548fSBarry Smith PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1334241dbb5eSStefano Zampini 1335beae5ec0SHong Zhang /* create msol_loc to hold mumps local solution */ 13361683a169SBarry Smith isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 13371683a169SBarry Smith sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1338801fbe65SHong Zhang 1339a1dfcbd9SJunchao Zhang lsol_loc = mumps->id.lsol_loc; 134071aed81dSHong Zhang nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 13419566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1342940cd9d6SSatish Balay mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1343801fbe65SHong Zhang mumps->id.isol_loc = isol_loc; 1344801fbe65SHong Zhang 13459566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 13462cd7d884SHong Zhang 134767602552SJunchao Zhang if (denseB) { 134825aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 134967602552SJunchao Zhang mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 13509566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &rbray)); 13519566063dSJacob Faibussowitsch PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 13529566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 13539566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(B, &m, NULL)); 13549566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 135525aac85cSJunchao Zhang } else { 135625aac85cSJunchao Zhang mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 135780577c12SJunchao Zhang /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 135880577c12SJunchao Zhang very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 135980577c12SJunchao Zhang 0, re-arrange B into desired order, which is a local operation. 136080577c12SJunchao Zhang */ 136180577c12SJunchao Zhang 136267602552SJunchao Zhang /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1363be818407SHong Zhang /* wrap dense rhs matrix B into a vector v_mpi */ 13649566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(B, &m, NULL)); 13659566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(B, &bray)); 13669566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 13679566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(B, &bray)); 13682b691707SHong Zhang 1369be818407SHong Zhang /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1370801fbe65SHong Zhang if (!mumps->myid) { 1371beae5ec0SHong Zhang PetscInt *idx; 1372beae5ec0SHong Zhang /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 13739566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrhs * M, &idx)); 13749566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(B, &rstart)); 1375be818407SHong Zhang k = 0; 13762d4298aeSJunchao Zhang for (proc = 0; proc < mumps->petsc_size; proc++) { 1377be818407SHong Zhang for (j = 0; j < nrhs; j++) { 1378beae5ec0SHong Zhang for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1379be818407SHong Zhang } 1380be818407SHong Zhang } 1381be818407SHong Zhang 13829566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 13839566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 13849566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1385801fbe65SHong Zhang } else { 13869566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 13879566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 13889566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1389801fbe65SHong Zhang } 13909566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 13919566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 13929566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_to)); 13939566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_from)); 13949566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1395801fbe65SHong Zhang 1396801fbe65SHong Zhang if (!mumps->myid) { /* define rhs on the host */ 13979566063dSJacob Faibussowitsch PetscCall(VecGetArray(b_seq, &bray)); 1398940cd9d6SSatish Balay mumps->id.rhs = (MumpsScalar *)bray; 13999566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b_seq, &bray)); 1400801fbe65SHong Zhang } 140125aac85cSJunchao Zhang } 14022b691707SHong Zhang } else { /* sparse B */ 14032b691707SHong Zhang b = (Mat_MPIAIJ *)Bt->data; 14042b691707SHong Zhang 1405be818407SHong Zhang /* wrap dense X into a vector v_mpi */ 14069566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(X, &m, NULL)); 14079566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &bray)); 14089566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 14099566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &bray)); 14102b691707SHong Zhang 14112b691707SHong Zhang if (!mumps->myid) { 14129566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(b->A, &aa)); 14139566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 141428b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 14159566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 14162b691707SHong Zhang mumps->id.rhs_sparse = (MumpsScalar *)aa; 14172b691707SHong Zhang } else { 14182b691707SHong Zhang mumps->id.irhs_ptr = NULL; 14192b691707SHong Zhang mumps->id.irhs_sparse = NULL; 14202b691707SHong Zhang mumps->id.nz_rhs = 0; 14212b691707SHong Zhang mumps->id.rhs_sparse = NULL; 14222b691707SHong Zhang } 14232b691707SHong Zhang } 14242b691707SHong Zhang 1425801fbe65SHong Zhang /* solve phase */ 1426801fbe65SHong Zhang mumps->id.job = JOB_SOLVE; 14273ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 142808401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1429801fbe65SHong Zhang 1430334c5f61SHong Zhang /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 14319566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &array)); 14329566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(v_mpi, array)); 1433801fbe65SHong Zhang 1434334c5f61SHong Zhang /* create scatter scat_sol */ 14359566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(X, &rstart)); 1436beae5ec0SHong Zhang /* iidx: index for scatter mumps solution to petsc X */ 1437beae5ec0SHong Zhang 14389566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 14399566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1440beae5ec0SHong Zhang for (i = 0; i < lsol_loc; i++) { 1441beae5ec0SHong Zhang isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1442beae5ec0SHong Zhang 14432d4298aeSJunchao Zhang for (proc = 0; proc < mumps->petsc_size; proc++) { 1444beae5ec0SHong Zhang if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1445beae5ec0SHong Zhang myrstart = rstart[proc]; 1446beae5ec0SHong Zhang k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1447beae5ec0SHong Zhang iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1448beae5ec0SHong Zhang m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1449beae5ec0SHong Zhang break; 1450be818407SHong Zhang } 1451be818407SHong Zhang } 1452be818407SHong Zhang 1453beae5ec0SHong Zhang for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1454801fbe65SHong Zhang } 14559566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 14569566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 14579566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 14589566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_from)); 14599566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_to)); 14609566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 14619566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &array)); 146271aed81dSHong Zhang 146371aed81dSHong Zhang /* free spaces */ 14641683a169SBarry Smith mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 146571aed81dSHong Zhang mumps->id.isol_loc = isol_loc_save; 146671aed81dSHong Zhang 14679566063dSJacob Faibussowitsch PetscCall(PetscFree2(sol_loc, isol_loc)); 14689566063dSJacob Faibussowitsch PetscCall(PetscFree(idxx)); 14699566063dSJacob Faibussowitsch PetscCall(VecDestroy(&msol_loc)); 14709566063dSJacob Faibussowitsch PetscCall(VecDestroy(&v_mpi)); 1471a6053eceSJunchao Zhang if (!denseB) { 14722b691707SHong Zhang if (!mumps->myid) { 1473d56c302dSHong Zhang b = (Mat_MPIAIJ *)Bt->data; 14749566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 14759566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 147628b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 14772b691707SHong Zhang } 14782b691707SHong Zhang } else { 147925aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { 14809566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b_seq)); 14819566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&scat_rhs)); 148225aac85cSJunchao Zhang } 14832b691707SHong Zhang } 14849566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&scat_sol)); 14859566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3))); 14863ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1487e0b74bf9SHong Zhang } 1488e0b74bf9SHong Zhang 1489d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1490d71ae5a4SJacob Faibussowitsch { 1491b18964edSHong Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1492*338d3105SPierre Jolivet const PetscMUMPSInt value = mumps->id.ICNTL(9); 1493b18964edSHong Zhang 1494b18964edSHong Zhang PetscFunctionBegin; 1495b18964edSHong Zhang mumps->id.ICNTL(9) = 0; 1496b18964edSHong Zhang PetscCall(MatMatSolve_MUMPS(A, B, X)); 1497*338d3105SPierre Jolivet mumps->id.ICNTL(9) = value; 14983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1499b18964edSHong Zhang } 1500b18964edSHong Zhang 1501d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1502d71ae5a4SJacob Faibussowitsch { 1503eb3ef3b2SHong Zhang PetscBool flg; 1504eb3ef3b2SHong Zhang Mat B; 1505eb3ef3b2SHong Zhang 1506eb3ef3b2SHong Zhang PetscFunctionBegin; 15079566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 150828b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1509eb3ef3b2SHong Zhang 1510eb3ef3b2SHong Zhang /* Create B=Bt^T that uses Bt's data structure */ 15119566063dSJacob Faibussowitsch PetscCall(MatCreateTranspose(Bt, &B)); 1512eb3ef3b2SHong Zhang 15139566063dSJacob Faibussowitsch PetscCall(MatMatSolve_MUMPS(A, B, X)); 15149566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 15153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1516eb3ef3b2SHong Zhang } 1517eb3ef3b2SHong Zhang 1518ace3df97SHong Zhang #if !defined(PETSC_USE_COMPLEX) 1519a58c3f20SHong Zhang /* 1520a58c3f20SHong Zhang input: 1521a58c3f20SHong Zhang F: numeric factor 1522a58c3f20SHong Zhang output: 1523a58c3f20SHong Zhang nneg: total number of negative pivots 152419d49a3bSHong Zhang nzero: total number of zero pivots 152519d49a3bSHong Zhang npos: (global dimension of F) - nneg - nzero 1526a58c3f20SHong Zhang */ 1527d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1528d71ae5a4SJacob Faibussowitsch { 1529e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1530c1490034SHong Zhang PetscMPIInt size; 1531a58c3f20SHong Zhang 1532a58c3f20SHong Zhang PetscFunctionBegin; 15339566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1534bcb30aebSHong Zhang /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1535aed4548fSBarry Smith PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1536ed85ac9fSHong Zhang 1537710ac8efSHong Zhang if (nneg) *nneg = mumps->id.INFOG(12); 1538ed85ac9fSHong Zhang if (nzero || npos) { 153908401ef6SPierre Jolivet PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1540710ac8efSHong Zhang if (nzero) *nzero = mumps->id.INFOG(28); 1541710ac8efSHong Zhang if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1542a58c3f20SHong Zhang } 15433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1544a58c3f20SHong Zhang } 154519d49a3bSHong Zhang #endif 1546a58c3f20SHong Zhang 1547d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1548d71ae5a4SJacob Faibussowitsch { 1549a6053eceSJunchao Zhang PetscInt i, nreqs; 1550a6053eceSJunchao Zhang PetscMUMPSInt *irn, *jcn; 1551a6053eceSJunchao Zhang PetscMPIInt count; 1552a6053eceSJunchao Zhang PetscInt64 totnnz, remain; 1553a6053eceSJunchao Zhang const PetscInt osize = mumps->omp_comm_size; 1554a6053eceSJunchao Zhang PetscScalar *val; 15553ab56b82SJunchao Zhang 15563ab56b82SJunchao Zhang PetscFunctionBegin; 1557a6053eceSJunchao Zhang if (osize > 1) { 15583ab56b82SJunchao Zhang if (reuse == MAT_INITIAL_MATRIX) { 15593ab56b82SJunchao Zhang /* master first gathers counts of nonzeros to receive */ 15609566063dSJacob Faibussowitsch if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 15619566063dSJacob Faibussowitsch PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 15623ab56b82SJunchao Zhang 1563a6053eceSJunchao Zhang /* Then each computes number of send/recvs */ 15643ab56b82SJunchao Zhang if (mumps->is_omp_master) { 1565a6053eceSJunchao Zhang /* Start from 1 since self communication is not done in MPI */ 1566a6053eceSJunchao Zhang nreqs = 0; 1567a6053eceSJunchao Zhang for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1568a6053eceSJunchao Zhang } else { 1569a6053eceSJunchao Zhang nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 15703ab56b82SJunchao Zhang } 157135cb6cd3SPierre Jolivet PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 15723ab56b82SJunchao Zhang 1573a6053eceSJunchao Zhang /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1574a6053eceSJunchao Zhang MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1575a6053eceSJunchao Zhang might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1576a6053eceSJunchao Zhang is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1577a6053eceSJunchao Zhang */ 1578a6053eceSJunchao Zhang nreqs = 0; /* counter for actual send/recvs */ 15793ab56b82SJunchao Zhang if (mumps->is_omp_master) { 1580a6053eceSJunchao Zhang for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 15819566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 15829566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(totnnz, &val)); 1583a6053eceSJunchao Zhang 1584a6053eceSJunchao Zhang /* Self communication */ 15859566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 15869566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 15879566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1588a6053eceSJunchao Zhang 1589a6053eceSJunchao Zhang /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 15909566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 15919566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->val_alloc)); 1592a6053eceSJunchao Zhang mumps->nnz = totnnz; 15933ab56b82SJunchao Zhang mumps->irn = irn; 15943ab56b82SJunchao Zhang mumps->jcn = jcn; 1595a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 1596a6053eceSJunchao Zhang 1597a6053eceSJunchao Zhang irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1598a6053eceSJunchao Zhang jcn += mumps->recvcount[0]; 1599a6053eceSJunchao Zhang val += mumps->recvcount[0]; 1600a6053eceSJunchao Zhang 1601a6053eceSJunchao Zhang /* Remote communication */ 1602a6053eceSJunchao Zhang for (i = 1; i < osize; i++) { 1603a6053eceSJunchao Zhang count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1604a6053eceSJunchao Zhang remain = mumps->recvcount[i] - count; 1605a6053eceSJunchao Zhang while (count > 0) { 16069566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 16079566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 16089566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1609a6053eceSJunchao Zhang irn += count; 1610a6053eceSJunchao Zhang jcn += count; 1611a6053eceSJunchao Zhang val += count; 1612a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1613a6053eceSJunchao Zhang remain -= count; 1614a6053eceSJunchao Zhang } 16153ab56b82SJunchao Zhang } 16163ab56b82SJunchao Zhang } else { 1617a6053eceSJunchao Zhang irn = mumps->irn; 1618a6053eceSJunchao Zhang jcn = mumps->jcn; 1619a6053eceSJunchao Zhang val = mumps->val; 1620a6053eceSJunchao Zhang count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1621a6053eceSJunchao Zhang remain = mumps->nnz - count; 1622a6053eceSJunchao Zhang while (count > 0) { 16239566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 16249566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 16259566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1626a6053eceSJunchao Zhang irn += count; 1627a6053eceSJunchao Zhang jcn += count; 1628a6053eceSJunchao Zhang val += count; 1629a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1630a6053eceSJunchao Zhang remain -= count; 16313ab56b82SJunchao Zhang } 16323ab56b82SJunchao Zhang } 1633a6053eceSJunchao Zhang } else { 1634a6053eceSJunchao Zhang nreqs = 0; 1635a6053eceSJunchao Zhang if (mumps->is_omp_master) { 1636a6053eceSJunchao Zhang val = mumps->val + mumps->recvcount[0]; 1637a6053eceSJunchao Zhang for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1638a6053eceSJunchao Zhang count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1639a6053eceSJunchao Zhang remain = mumps->recvcount[i] - count; 1640a6053eceSJunchao Zhang while (count > 0) { 16419566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1642a6053eceSJunchao Zhang val += count; 1643a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1644a6053eceSJunchao Zhang remain -= count; 1645a6053eceSJunchao Zhang } 1646a6053eceSJunchao Zhang } 1647a6053eceSJunchao Zhang } else { 1648a6053eceSJunchao Zhang val = mumps->val; 1649a6053eceSJunchao Zhang count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1650a6053eceSJunchao Zhang remain = mumps->nnz - count; 1651a6053eceSJunchao Zhang while (count > 0) { 16529566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1653a6053eceSJunchao Zhang val += count; 1654a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1655a6053eceSJunchao Zhang remain -= count; 1656a6053eceSJunchao Zhang } 1657a6053eceSJunchao Zhang } 1658a6053eceSJunchao Zhang } 16599566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1660a6053eceSJunchao Zhang mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1661a6053eceSJunchao Zhang } 16623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16633ab56b82SJunchao Zhang } 16643ab56b82SJunchao Zhang 1665d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1666d71ae5a4SJacob Faibussowitsch { 1667e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1668ace3abfcSBarry Smith PetscBool isMPIAIJ; 1669397b6df1SKris Buschelman 1670397b6df1SKris Buschelman PetscFunctionBegin; 1671dbf6bb8dSprj- if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 167248a46eb9SPierre Jolivet if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 16739566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 16743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16752aca8efcSHong Zhang } 16766baea169SHong Zhang 16779566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 16789566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1679397b6df1SKris Buschelman 1680397b6df1SKris Buschelman /* numerical factorization phase */ 1681a5e57a09SHong Zhang mumps->id.job = JOB_FACTNUMERIC; 16824e34a73bSHong Zhang if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1683ad540459SPierre Jolivet if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1684397b6df1SKris Buschelman } else { 1685940cd9d6SSatish Balay mumps->id.a_loc = (MumpsScalar *)mumps->val; 1686397b6df1SKris Buschelman } 16873ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 1688a5e57a09SHong Zhang if (mumps->id.INFOG(1) < 0) { 16897a46b595SBarry Smith PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 1690c0d63f2fSHong Zhang if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 16919566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1692603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1693c0d63f2fSHong Zhang } else if (mumps->id.INFOG(1) == -13) { 16949566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1695603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1696c0d63f2fSHong Zhang } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 16979566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1698603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 16992aca8efcSHong Zhang } else { 17009566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1701603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OTHER; 1702151787a6SHong Zhang } 17032aca8efcSHong Zhang } 1704aed4548fSBarry Smith PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, " mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16)); 1705397b6df1SKris Buschelman 1706b3cb21ddSStefano Zampini F->assembled = PETSC_TRUE; 1707d47f36abSHong Zhang 1708b3cb21ddSStefano Zampini if (F->schur) { /* reset Schur status to unfactored */ 17093cb7dd0eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 1710c70f7ee4SJunchao Zhang F->schur->offloadmask = PETSC_OFFLOAD_CPU; 17113cb7dd0eSStefano Zampini #endif 1712b3cb21ddSStefano Zampini if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1713b3cb21ddSStefano Zampini mumps->id.ICNTL(19) = 2; 17149566063dSJacob Faibussowitsch PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1715b3cb21ddSStefano Zampini } 17169566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1717b3cb21ddSStefano Zampini } 171867877ebaSShri Abhyankar 1719066565c5SStefano Zampini /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1720066565c5SStefano Zampini if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1721066565c5SStefano Zampini 17223ab56b82SJunchao Zhang if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 17232d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 172467877ebaSShri Abhyankar PetscInt lsol_loc; 172567877ebaSShri Abhyankar PetscScalar *sol_loc; 17262205254eSKarl Rupp 17279566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1728c2093ab7SHong Zhang 1729c2093ab7SHong Zhang /* distributed solution; Create x_seq=sol_loc for repeated use */ 1730c2093ab7SHong Zhang if (mumps->x_seq) { 17319566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 17329566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 17339566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->x_seq)); 1734c2093ab7SHong Zhang } 1735a5e57a09SHong Zhang lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 17369566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1737a5e57a09SHong Zhang mumps->id.lsol_loc = lsol_loc; 1738940cd9d6SSatish Balay mumps->id.sol_loc = (MumpsScalar *)sol_loc; 17399566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 174067877ebaSShri Abhyankar } 17419566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 17423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1743397b6df1SKris Buschelman } 1744397b6df1SKris Buschelman 17459a2535b5SHong Zhang /* Sets MUMPS options from the options database */ 1746d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1747d71ae5a4SJacob Faibussowitsch { 1748e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1749413bcc21SPierre Jolivet PetscMUMPSInt icntl = 0, size, *listvar_schur; 175045e3843bSPierre Jolivet PetscInt info[80], i, ninfo = 80, rbs, cbs; 1751413bcc21SPierre Jolivet PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1752413bcc21SPierre Jolivet MumpsScalar *arr; 1753dcd589f8SShri Abhyankar 1754dcd589f8SShri Abhyankar PetscFunctionBegin; 175526cc229bSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 1756413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1757413bcc21SPierre Jolivet PetscInt nthreads = 0; 1758413bcc21SPierre Jolivet PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1759413bcc21SPierre Jolivet PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1760413bcc21SPierre Jolivet 1761413bcc21SPierre Jolivet mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1762413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 1763413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 1764413bcc21SPierre Jolivet 1765413bcc21SPierre Jolivet PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 1766413bcc21SPierre Jolivet if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1767413bcc21SPierre Jolivet /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1768413bcc21SPierre Jolivet PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 1769413bcc21SPierre Jolivet if (mumps->use_petsc_omp_support) { 17709371c9d4SSatish Balay PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 17719371c9d4SSatish Balay ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1772413bcc21SPierre Jolivet PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1773413bcc21SPierre Jolivet #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1774413bcc21SPierre Jolivet PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 1775413bcc21SPierre Jolivet PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 1776413bcc21SPierre Jolivet #endif 1777413bcc21SPierre Jolivet } else { 1778413bcc21SPierre Jolivet mumps->omp_comm = PETSC_COMM_SELF; 1779413bcc21SPierre Jolivet mumps->mumps_comm = mumps->petsc_comm; 1780413bcc21SPierre Jolivet mumps->is_omp_master = PETSC_TRUE; 1781413bcc21SPierre Jolivet } 1782413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 1783413bcc21SPierre Jolivet mumps->reqs = NULL; 1784413bcc21SPierre Jolivet mumps->tag = 0; 1785413bcc21SPierre Jolivet 1786413bcc21SPierre Jolivet if (mumps->mumps_comm != MPI_COMM_NULL) { 1787413bcc21SPierre Jolivet if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 1788413bcc21SPierre Jolivet /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1789413bcc21SPierre Jolivet MPI_Comm comm; 1790413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 1791413bcc21SPierre Jolivet mumps->mumps_comm = comm; 1792413bcc21SPierre Jolivet } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 1793413bcc21SPierre Jolivet } 1794413bcc21SPierre Jolivet 1795413bcc21SPierre Jolivet mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1796413bcc21SPierre Jolivet mumps->id.job = JOB_INIT; 1797413bcc21SPierre Jolivet mumps->id.par = 1; /* host participates factorizaton and solve */ 1798413bcc21SPierre Jolivet mumps->id.sym = mumps->sym; 1799413bcc21SPierre Jolivet 1800413bcc21SPierre Jolivet size = mumps->id.size_schur; 1801413bcc21SPierre Jolivet arr = mumps->id.schur; 1802413bcc21SPierre Jolivet listvar_schur = mumps->id.listvar_schur; 1803413bcc21SPierre Jolivet PetscMUMPS_c(mumps); 1804413bcc21SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1805413bcc21SPierre Jolivet /* restore cached ICNTL and CNTL values */ 1806413bcc21SPierre Jolivet for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 1807413bcc21SPierre Jolivet for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 1808413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->ICNTL_pre)); 1809413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->CNTL_pre)); 1810413bcc21SPierre Jolivet 1811413bcc21SPierre Jolivet if (schur) { 1812413bcc21SPierre Jolivet mumps->id.size_schur = size; 1813413bcc21SPierre Jolivet mumps->id.schur_lld = size; 1814413bcc21SPierre Jolivet mumps->id.schur = arr; 1815413bcc21SPierre Jolivet mumps->id.listvar_schur = listvar_schur; 1816413bcc21SPierre Jolivet if (mumps->petsc_size > 1) { 1817413bcc21SPierre Jolivet PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 1818413bcc21SPierre Jolivet 1819413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1820413bcc21SPierre Jolivet gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1821712fec58SPierre Jolivet PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 1822413bcc21SPierre Jolivet PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 1823413bcc21SPierre Jolivet } else { 1824413bcc21SPierre Jolivet if (F->factortype == MAT_FACTOR_LU) { 1825413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1826413bcc21SPierre Jolivet } else { 1827413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1828413bcc21SPierre Jolivet } 1829413bcc21SPierre Jolivet } 1830413bcc21SPierre Jolivet mumps->id.ICNTL(26) = -1; 1831413bcc21SPierre Jolivet } 1832413bcc21SPierre Jolivet 1833413bcc21SPierre Jolivet /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1834413bcc21SPierre Jolivet For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1835413bcc21SPierre Jolivet */ 1836413bcc21SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 1837413bcc21SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 1838413bcc21SPierre Jolivet 1839413bcc21SPierre Jolivet mumps->scat_rhs = NULL; 1840413bcc21SPierre Jolivet mumps->scat_sol = NULL; 1841413bcc21SPierre Jolivet 1842413bcc21SPierre Jolivet /* set PETSc-MUMPS default options - override MUMPS default */ 1843413bcc21SPierre Jolivet mumps->id.ICNTL(3) = 0; 1844413bcc21SPierre Jolivet mumps->id.ICNTL(4) = 0; 1845413bcc21SPierre Jolivet if (mumps->petsc_size == 1) { 1846413bcc21SPierre Jolivet mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1847413bcc21SPierre Jolivet mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1848413bcc21SPierre Jolivet } else { 1849413bcc21SPierre Jolivet mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1850413bcc21SPierre Jolivet mumps->id.ICNTL(21) = 1; /* distributed solution */ 1851413bcc21SPierre Jolivet } 1852413bcc21SPierre Jolivet } 18539566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 18549a2535b5SHong Zhang if (flg) mumps->id.ICNTL(1) = icntl; 18559566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 18569a2535b5SHong Zhang if (flg) mumps->id.ICNTL(2) = icntl; 18579566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 18589a2535b5SHong Zhang if (flg) mumps->id.ICNTL(3) = icntl; 1859dcd589f8SShri Abhyankar 18609566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 18619a2535b5SHong Zhang if (flg) mumps->id.ICNTL(4) = icntl; 18629a2535b5SHong Zhang if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 18639a2535b5SHong Zhang 18649566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 18659a2535b5SHong Zhang if (flg) mumps->id.ICNTL(6) = icntl; 18669a2535b5SHong Zhang 18679566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 1868dcd589f8SShri Abhyankar if (flg) { 1869aed4548fSBarry Smith PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 1870b53c1a7fSBarry Smith mumps->id.ICNTL(7) = icntl; 1871dcd589f8SShri Abhyankar } 1872e0b74bf9SHong Zhang 18739566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 18749566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 18759566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 18769566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 18779566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 18789566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 18799566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 188045e3843bSPierre Jolivet PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 188145e3843bSPierre Jolivet if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 188245e3843bSPierre Jolivet PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 188345e3843bSPierre Jolivet if (flg) { 188445e3843bSPierre Jolivet PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 188545e3843bSPierre Jolivet PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 188645e3843bSPierre Jolivet } 18879566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 188859ac8732SStefano Zampini if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 18899566063dSJacob Faibussowitsch PetscCall(MatDestroy(&F->schur)); 18909566063dSJacob Faibussowitsch PetscCall(MatMumpsResetSchur_Private(mumps)); 189159ac8732SStefano Zampini } 189225aac85cSJunchao Zhang 189343f3b051SJunchao Zhang /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 189443f3b051SJunchao Zhang and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 189525aac85cSJunchao Zhang and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 189643f3b051SJunchao Zhang This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 189743f3b051SJunchao Zhang see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 189843f3b051SJunchao Zhang In short, we could not use distributed RHS with MPICH until v4.0b1. 189925aac85cSJunchao Zhang */ 190043f3b051SJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 190125aac85cSJunchao Zhang mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 190243f3b051SJunchao Zhang #else 190343f3b051SJunchao Zhang mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 190425aac85cSJunchao Zhang #endif 19059566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 1906aed4548fSBarry Smith PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 190725aac85cSJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 1908aed4548fSBarry Smith PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 190925aac85cSJunchao Zhang #endif 19109566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 19119a2535b5SHong Zhang 19129566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 19139566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 19149566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 19159371c9d4SSatish Balay if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 1916d7ebd59bSHong Zhang 19179566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 19189566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 19199566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 19209566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 19219566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 19229566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 19239566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 19249566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 19259566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 19269566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 19279566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 19289566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 1929dcd589f8SShri Abhyankar 19309566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 19319566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 19329566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 19339566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 19349566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 19359566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 1936e5bb22a1SHong Zhang 19379566063dSJacob Faibussowitsch PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 1938b34f08ffSHong Zhang 19399566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 1940b34f08ffSHong Zhang if (ninfo) { 194108401ef6SPierre Jolivet PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 19429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ninfo, &mumps->info)); 1943b34f08ffSHong Zhang mumps->ninfo = ninfo; 1944b34f08ffSHong Zhang for (i = 0; i < ninfo; i++) { 1945aed4548fSBarry Smith PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 1946f7d195e4SLawrence Mitchell mumps->info[i] = info[i]; 1947b34f08ffSHong Zhang } 1948b34f08ffSHong Zhang } 1949d0609cedSBarry Smith PetscOptionsEnd(); 19503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1951dcd589f8SShri Abhyankar } 1952dcd589f8SShri Abhyankar 1953d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 1954d71ae5a4SJacob Faibussowitsch { 19555cd7cf9dSHong Zhang PetscFunctionBegin; 19565cd7cf9dSHong Zhang if (mumps->id.INFOG(1) < 0) { 19577a46b595SBarry Smith PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1)); 19585cd7cf9dSHong Zhang if (mumps->id.INFOG(1) == -6) { 19599566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1960603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 19615cd7cf9dSHong Zhang } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 19629566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1963603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1964dbf6bb8dSprj- } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 19659566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "Empty matrix\n")); 19665cd7cf9dSHong Zhang } else { 19679566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1968603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OTHER; 19695cd7cf9dSHong Zhang } 19705cd7cf9dSHong Zhang } 19713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 19725cd7cf9dSHong Zhang } 19735cd7cf9dSHong Zhang 1974d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 1975d71ae5a4SJacob Faibussowitsch { 1976e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 197767877ebaSShri Abhyankar Vec b; 197867877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 1979397b6df1SKris Buschelman 1980397b6df1SKris Buschelman PetscFunctionBegin; 1981d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 1982d47f36abSHong Zhang /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 19833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1984d47f36abSHong Zhang } 1985dcd589f8SShri Abhyankar 19869a2535b5SHong Zhang /* Set MUMPS options from the options database */ 198726cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 1988dcd589f8SShri Abhyankar 19899566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 19909566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 1991dcd589f8SShri Abhyankar 199267877ebaSShri Abhyankar /* analysis phase */ 1993a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 1994a5e57a09SHong Zhang mumps->id.n = M; 1995a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 199667877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 1997a5e57a09SHong Zhang if (!mumps->myid) { 1998a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 1999a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2000a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2001a6053eceSJunchao Zhang if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 20024ac6704cSBarry Smith if (r) { 20034ac6704cSBarry Smith mumps->id.ICNTL(7) = 1; 2004a5e57a09SHong Zhang if (!mumps->myid) { 2005e0b74bf9SHong Zhang const PetscInt *idx; 2006a6053eceSJunchao Zhang PetscInt i; 20072205254eSKarl Rupp 20089566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 20099566063dSJacob Faibussowitsch PetscCall(ISGetIndices(r, &idx)); 20109566063dSJacob Faibussowitsch for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 20119566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(r, &idx)); 2012e0b74bf9SHong Zhang } 2013e0b74bf9SHong Zhang } 201467877ebaSShri Abhyankar } 201567877ebaSShri Abhyankar break; 201667877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2017a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2018a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2019a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2020a6053eceSJunchao Zhang if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 202125aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 20229566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 20239566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 20249566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 202525aac85cSJunchao Zhang } 202667877ebaSShri Abhyankar break; 202767877ebaSShri Abhyankar } 20283ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 20299566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 203067877ebaSShri Abhyankar 2031719d5645SBarry Smith F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2032dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 203351d5961aSHong Zhang F->ops->solvetranspose = MatSolveTranspose_MUMPS; 20344e34a73bSHong Zhang F->ops->matsolve = MatMatSolve_MUMPS; 2035eb3ef3b2SHong Zhang F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2036b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2037d47f36abSHong Zhang 2038d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 20393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2040b24902e0SBarry Smith } 2041b24902e0SBarry Smith 2042450b117fSShri Abhyankar /* Note the Petsc r and c permutations are ignored */ 2043d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2044d71ae5a4SJacob Faibussowitsch { 2045e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 204667877ebaSShri Abhyankar Vec b; 204767877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 2048450b117fSShri Abhyankar 2049450b117fSShri Abhyankar PetscFunctionBegin; 2050d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2051*338d3105SPierre Jolivet /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 20523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2053d47f36abSHong Zhang } 2054dcd589f8SShri Abhyankar 20559a2535b5SHong Zhang /* Set MUMPS options from the options database */ 205626cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 2057dcd589f8SShri Abhyankar 20589566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 20599566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 206067877ebaSShri Abhyankar 206167877ebaSShri Abhyankar /* analysis phase */ 2062a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 2063a5e57a09SHong Zhang mumps->id.n = M; 2064a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 206567877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 2066a5e57a09SHong Zhang if (!mumps->myid) { 2067a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 2068a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2069a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2070ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 207167877ebaSShri Abhyankar } 207267877ebaSShri Abhyankar break; 207367877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2074a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2075a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2076a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2077ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 207825aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 20799566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 20809566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 20819566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 208225aac85cSJunchao Zhang } 208367877ebaSShri Abhyankar break; 208467877ebaSShri Abhyankar } 20853ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 20869566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 208767877ebaSShri Abhyankar 2088450b117fSShri Abhyankar F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2089dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 209051d5961aSHong Zhang F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2091b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2092d47f36abSHong Zhang 2093d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 20943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2095450b117fSShri Abhyankar } 2096b24902e0SBarry Smith 2097141f4205SHong Zhang /* Note the Petsc r permutation and factor info are ignored */ 2098d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2099d71ae5a4SJacob Faibussowitsch { 2100e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 210167877ebaSShri Abhyankar Vec b; 210267877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 2103397b6df1SKris Buschelman 2104397b6df1SKris Buschelman PetscFunctionBegin; 2105d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2106*338d3105SPierre Jolivet /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 21073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2108d47f36abSHong Zhang } 2109dcd589f8SShri Abhyankar 21109a2535b5SHong Zhang /* Set MUMPS options from the options database */ 211126cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 2112dcd589f8SShri Abhyankar 21139566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 21149566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2115dcd589f8SShri Abhyankar 211667877ebaSShri Abhyankar /* analysis phase */ 2117a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 2118a5e57a09SHong Zhang mumps->id.n = M; 2119a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 212067877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 2121a5e57a09SHong Zhang if (!mumps->myid) { 2122a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 2123a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2124a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2125ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 212667877ebaSShri Abhyankar } 212767877ebaSShri Abhyankar break; 212867877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2129a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2130a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2131a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2132ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 213325aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 21349566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 21359566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 21369566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 213725aac85cSJunchao Zhang } 213867877ebaSShri Abhyankar break; 213967877ebaSShri Abhyankar } 21403ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 21419566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 21425cd7cf9dSHong Zhang 21432792810eSHong Zhang F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2144dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 214551d5961aSHong Zhang F->ops->solvetranspose = MatSolve_MUMPS; 21464e34a73bSHong Zhang F->ops->matsolve = MatMatSolve_MUMPS; 214723a5080aSHong Zhang F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2148b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 21494e34a73bSHong Zhang #if defined(PETSC_USE_COMPLEX) 21500298fd71SBarry Smith F->ops->getinertia = NULL; 21514e34a73bSHong Zhang #else 21524e34a73bSHong Zhang F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2153db4efbfdSBarry Smith #endif 2154d47f36abSHong Zhang 2155d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 21563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2157b24902e0SBarry Smith } 2158b24902e0SBarry Smith 2159d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2160d71ae5a4SJacob Faibussowitsch { 216164e6c443SBarry Smith PetscBool iascii; 216264e6c443SBarry Smith PetscViewerFormat format; 2163e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2164f6c57405SHong Zhang 2165f6c57405SHong Zhang PetscFunctionBegin; 216664e6c443SBarry Smith /* check if matrix is mumps type */ 21673ba16761SJacob Faibussowitsch if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 216864e6c443SBarry Smith 21699566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 217064e6c443SBarry Smith if (iascii) { 21719566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 21721511cd71SPierre Jolivet if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 21739566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 21741511cd71SPierre Jolivet if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 21759566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 21769566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 21779566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 21789566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 21799566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 21809566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 21819566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 21829566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 21839566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 21849566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 21859566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 21869566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2187a5e57a09SHong Zhang if (mumps->id.ICNTL(11) > 0) { 21889566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 21899566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 21909566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 21919566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 21929566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 21939566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2194f6c57405SHong Zhang } 21959566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 21969566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 21979566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 219845e3843bSPierre Jolivet PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2199f6c57405SHong Zhang /* ICNTL(15-17) not used */ 22009566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 22019566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 22029566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 22039566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 22049566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 22059566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2206c0165424SHong Zhang 22079566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 22089566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 22099566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 22109566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 22119566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 22129566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 221342179a6aSHong Zhang 22149566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 22159566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 22169566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 22179566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 22189566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 22199566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2220f6c57405SHong Zhang 22219566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 22229566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 22239566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 22249566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 22259566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 22269566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2227f6c57405SHong Zhang 2228a5b23f4aSJose E. Roman /* information local to each processor */ 22299566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 22309566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 22319566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 22329566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 22339566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 22349566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 22359566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 22369566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 22379566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 22389566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2239f6c57405SHong Zhang 22409566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 22419566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 22429566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2243f6c57405SHong Zhang 22449566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 22459566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 22469566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2247f6c57405SHong Zhang 22489566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 22499566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 22509566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2251b34f08ffSHong Zhang 2252a0e18203SThibaut Appel if (mumps->ninfo && mumps->ninfo <= 80) { 2253b34f08ffSHong Zhang PetscInt i; 2254b34f08ffSHong Zhang for (i = 0; i < mumps->ninfo; i++) { 22559566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 22569566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 22579566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2258b34f08ffSHong Zhang } 2259b34f08ffSHong Zhang } 22609566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 22611511cd71SPierre Jolivet } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2262f6c57405SHong Zhang 22631511cd71SPierre Jolivet if (mumps->myid == 0) { /* information from the host */ 22649566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 22659566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 22669566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 22679566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2268f6c57405SHong Zhang 22699566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 22709566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 22719566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 22729566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 22739566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 22749566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 22759566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 22769566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 22779566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 22789566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 22799566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 22809566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 22819566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 22829566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 22839566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 22849566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 22859566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 22869566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 22879566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 22889566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 22899566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 22909566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 22919566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 22929566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 22939566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 22949566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 22959566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 22969566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 22979566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 22989566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 22999566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 23009566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 23019566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 23029566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2303f6c57405SHong Zhang } 2304f6c57405SHong Zhang } 2305cb828f0fSHong Zhang } 23063ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2307f6c57405SHong Zhang } 2308f6c57405SHong Zhang 2309d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2310d71ae5a4SJacob Faibussowitsch { 2311e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 231235bd34faSBarry Smith 231335bd34faSBarry Smith PetscFunctionBegin; 231435bd34faSBarry Smith info->block_size = 1.0; 2315cb828f0fSHong Zhang info->nz_allocated = mumps->id.INFOG(20); 2316cb828f0fSHong Zhang info->nz_used = mumps->id.INFOG(20); 231735bd34faSBarry Smith info->nz_unneeded = 0.0; 231835bd34faSBarry Smith info->assemblies = 0.0; 231935bd34faSBarry Smith info->mallocs = 0.0; 232035bd34faSBarry Smith info->memory = 0.0; 232135bd34faSBarry Smith info->fill_ratio_given = 0; 232235bd34faSBarry Smith info->fill_ratio_needed = 0; 232335bd34faSBarry Smith info->factor_mallocs = 0; 23243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 232535bd34faSBarry Smith } 232635bd34faSBarry Smith 2327d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2328d71ae5a4SJacob Faibussowitsch { 2329e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2330a3d589ffSStefano Zampini const PetscScalar *arr; 23318e7ba810SStefano Zampini const PetscInt *idxs; 23328e7ba810SStefano Zampini PetscInt size, i; 23336444a565SStefano Zampini 23346444a565SStefano Zampini PetscFunctionBegin; 23359566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is, &size)); 2336b3cb21ddSStefano Zampini /* Schur complement matrix */ 23379566063dSJacob Faibussowitsch PetscCall(MatDestroy(&F->schur)); 23389566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 23399566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2340a3d589ffSStefano Zampini mumps->id.schur = (MumpsScalar *)arr; 2341a3d589ffSStefano Zampini mumps->id.size_schur = size; 2342a3d589ffSStefano Zampini mumps->id.schur_lld = size; 23439566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 234448a46eb9SPierre Jolivet if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2345b3cb21ddSStefano Zampini 2346b3cb21ddSStefano Zampini /* MUMPS expects Fortran style indices */ 23479566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.listvar_schur)); 23489566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 23499566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is, &idxs)); 23509566063dSJacob Faibussowitsch for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 23519566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is, &idxs)); 235259ac8732SStefano Zampini /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2353b5fa320bSStefano Zampini mumps->id.ICNTL(26) = -1; 23543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 23556444a565SStefano Zampini } 235659ac8732SStefano Zampini 2357d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2358d71ae5a4SJacob Faibussowitsch { 23596444a565SStefano Zampini Mat St; 2360e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 23616444a565SStefano Zampini PetscScalar *array; 23626444a565SStefano Zampini #if defined(PETSC_USE_COMPLEX) 23638ac429a0SStefano Zampini PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 23646444a565SStefano Zampini #endif 23656444a565SStefano Zampini 23666444a565SStefano Zampini PetscFunctionBegin; 236708401ef6SPierre Jolivet PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 23689566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 23699566063dSJacob Faibussowitsch PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 23709566063dSJacob Faibussowitsch PetscCall(MatSetType(St, MATDENSE)); 23719566063dSJacob Faibussowitsch PetscCall(MatSetUp(St)); 23729566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(St, &array)); 237359ac8732SStefano Zampini if (!mumps->sym) { /* MUMPS always return a full matrix */ 23746444a565SStefano Zampini if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 23756444a565SStefano Zampini PetscInt i, j, N = mumps->id.size_schur; 23766444a565SStefano Zampini for (i = 0; i < N; i++) { 23776444a565SStefano Zampini for (j = 0; j < N; j++) { 23786444a565SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 23796444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j]; 23806444a565SStefano Zampini #else 23816444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 23826444a565SStefano Zampini #endif 23836444a565SStefano Zampini array[j * N + i] = val; 23846444a565SStefano Zampini } 23856444a565SStefano Zampini } 23866444a565SStefano Zampini } else { /* stored by columns */ 23879566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 23886444a565SStefano Zampini } 23896444a565SStefano Zampini } else { /* either full or lower-triangular (not packed) */ 23906444a565SStefano Zampini if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 23916444a565SStefano Zampini PetscInt i, j, N = mumps->id.size_schur; 23926444a565SStefano Zampini for (i = 0; i < N; i++) { 23936444a565SStefano Zampini for (j = i; j < N; j++) { 23946444a565SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 23956444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j]; 23966444a565SStefano Zampini #else 23976444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 23986444a565SStefano Zampini #endif 23996444a565SStefano Zampini array[i * N + j] = val; 24006444a565SStefano Zampini array[j * N + i] = val; 24016444a565SStefano Zampini } 24026444a565SStefano Zampini } 24036444a565SStefano Zampini } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 24049566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 24056444a565SStefano Zampini } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 24066444a565SStefano Zampini PetscInt i, j, N = mumps->id.size_schur; 24076444a565SStefano Zampini for (i = 0; i < N; i++) { 24086444a565SStefano Zampini for (j = 0; j < i + 1; j++) { 24096444a565SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 24106444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j]; 24116444a565SStefano Zampini #else 24126444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 24136444a565SStefano Zampini #endif 24146444a565SStefano Zampini array[i * N + j] = val; 24156444a565SStefano Zampini array[j * N + i] = val; 24166444a565SStefano Zampini } 24176444a565SStefano Zampini } 24186444a565SStefano Zampini } 24196444a565SStefano Zampini } 24209566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(St, &array)); 24216444a565SStefano Zampini *S = St; 24223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 24236444a565SStefano Zampini } 24246444a565SStefano Zampini 2425d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2426d71ae5a4SJacob Faibussowitsch { 2427e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 24285ccb76cbSHong Zhang 24295ccb76cbSHong Zhang PetscFunctionBegin; 2430413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2431413bcc21SPierre Jolivet PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 24329371c9d4SSatish Balay for (i = 0; i < nICNTL_pre; ++i) 24339371c9d4SSatish Balay if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2434413bcc21SPierre Jolivet if (i == nICNTL_pre) { /* not already cached */ 2435413bcc21SPierre Jolivet if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2436413bcc21SPierre Jolivet else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2437413bcc21SPierre Jolivet mumps->ICNTL_pre[0]++; 2438413bcc21SPierre Jolivet } 2439413bcc21SPierre Jolivet mumps->ICNTL_pre[1 + 2 * i] = icntl; 2440413bcc21SPierre Jolivet PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2441413bcc21SPierre Jolivet } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 24423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 24435ccb76cbSHong Zhang } 24445ccb76cbSHong Zhang 2445d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2446d71ae5a4SJacob Faibussowitsch { 2447e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2448bc6112feSHong Zhang 2449bc6112feSHong Zhang PetscFunctionBegin; 245036df9881Sjeremy theler if (mumps->id.job == JOB_NULL) { 245136df9881Sjeremy theler PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 245236df9881Sjeremy theler *ival = 0; 245336df9881Sjeremy theler for (i = 0; i < nICNTL_pre; ++i) { 245436df9881Sjeremy theler if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 245536df9881Sjeremy theler } 245636df9881Sjeremy theler } else *ival = mumps->id.ICNTL(icntl); 24573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2458bc6112feSHong Zhang } 2459bc6112feSHong Zhang 24605ccb76cbSHong Zhang /*@ 24615ccb76cbSHong Zhang MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 24625ccb76cbSHong Zhang 2463c3339decSBarry Smith Logically Collective 24645ccb76cbSHong Zhang 24655ccb76cbSHong Zhang Input Parameters: 246611a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 24675ccb76cbSHong Zhang . icntl - index of MUMPS parameter array ICNTL() 24685ccb76cbSHong Zhang - ival - value of MUMPS ICNTL(icntl) 24695ccb76cbSHong Zhang 24703c7db156SBarry Smith Options Database Key: 2471147403d9SBarry Smith . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 24725ccb76cbSHong Zhang 24735ccb76cbSHong Zhang Level: beginner 24745ccb76cbSHong Zhang 247596a0c994SBarry Smith References: 2476606c0280SSatish Balay . * - MUMPS Users' Guide 24775ccb76cbSHong Zhang 24781cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 24795ccb76cbSHong Zhang @*/ 2480d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2481d71ae5a4SJacob Faibussowitsch { 24825ccb76cbSHong Zhang PetscFunctionBegin; 24832989dfd4SHong Zhang PetscValidType(F, 1); 248428b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 24855ccb76cbSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 24865ccb76cbSHong Zhang PetscValidLogicalCollectiveInt(F, ival, 3); 2487413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2488cac4c232SBarry Smith PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 24893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 24905ccb76cbSHong Zhang } 24915ccb76cbSHong Zhang 2492a21f80fcSHong Zhang /*@ 2493a21f80fcSHong Zhang MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2494a21f80fcSHong Zhang 2495c3339decSBarry Smith Logically Collective 2496a21f80fcSHong Zhang 2497a21f80fcSHong Zhang Input Parameters: 249811a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2499a21f80fcSHong Zhang - icntl - index of MUMPS parameter array ICNTL() 2500a21f80fcSHong Zhang 2501a21f80fcSHong Zhang Output Parameter: 2502a21f80fcSHong Zhang . ival - value of MUMPS ICNTL(icntl) 2503a21f80fcSHong Zhang 2504a21f80fcSHong Zhang Level: beginner 2505a21f80fcSHong Zhang 250696a0c994SBarry Smith References: 2507606c0280SSatish Balay . * - MUMPS Users' Guide 2508a21f80fcSHong Zhang 25091cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2510a21f80fcSHong Zhang @*/ 2511d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2512d71ae5a4SJacob Faibussowitsch { 2513bc6112feSHong Zhang PetscFunctionBegin; 25142989dfd4SHong Zhang PetscValidType(F, 1); 251528b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2516bc6112feSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 2517bc6112feSHong Zhang PetscValidIntPointer(ival, 3); 2518413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2519cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 25203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2521bc6112feSHong Zhang } 2522bc6112feSHong Zhang 2523d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2524d71ae5a4SJacob Faibussowitsch { 2525e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 25268928b65cSHong Zhang 25278928b65cSHong Zhang PetscFunctionBegin; 2528413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { 2529413bcc21SPierre Jolivet PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 25309371c9d4SSatish Balay for (i = 0; i < nCNTL_pre; ++i) 25319371c9d4SSatish Balay if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2532413bcc21SPierre Jolivet if (i == nCNTL_pre) { 2533413bcc21SPierre Jolivet if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2534413bcc21SPierre Jolivet else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2535413bcc21SPierre Jolivet mumps->CNTL_pre[0]++; 2536413bcc21SPierre Jolivet } 2537413bcc21SPierre Jolivet mumps->CNTL_pre[1 + 2 * i] = icntl; 2538413bcc21SPierre Jolivet mumps->CNTL_pre[2 + 2 * i] = val; 2539413bcc21SPierre Jolivet } else mumps->id.CNTL(icntl) = val; 25403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25418928b65cSHong Zhang } 25428928b65cSHong Zhang 2543d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2544d71ae5a4SJacob Faibussowitsch { 2545e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2546bc6112feSHong Zhang 2547bc6112feSHong Zhang PetscFunctionBegin; 254836df9881Sjeremy theler if (mumps->id.job == JOB_NULL) { 254936df9881Sjeremy theler PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 255036df9881Sjeremy theler *val = 0.0; 255136df9881Sjeremy theler for (i = 0; i < nCNTL_pre; ++i) { 255236df9881Sjeremy theler if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 255336df9881Sjeremy theler } 255436df9881Sjeremy theler } else *val = mumps->id.CNTL(icntl); 25553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2556bc6112feSHong Zhang } 2557bc6112feSHong Zhang 25588928b65cSHong Zhang /*@ 25598928b65cSHong Zhang MatMumpsSetCntl - Set MUMPS parameter CNTL() 25608928b65cSHong Zhang 2561c3339decSBarry Smith Logically Collective 25628928b65cSHong Zhang 25638928b65cSHong Zhang Input Parameters: 256411a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 25658928b65cSHong Zhang . icntl - index of MUMPS parameter array CNTL() 25668928b65cSHong Zhang - val - value of MUMPS CNTL(icntl) 25678928b65cSHong Zhang 25683c7db156SBarry Smith Options Database Key: 2569147403d9SBarry Smith . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 25708928b65cSHong Zhang 25718928b65cSHong Zhang Level: beginner 25728928b65cSHong Zhang 257396a0c994SBarry Smith References: 2574606c0280SSatish Balay . * - MUMPS Users' Guide 25758928b65cSHong Zhang 25761cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 25778928b65cSHong Zhang @*/ 2578d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2579d71ae5a4SJacob Faibussowitsch { 25808928b65cSHong Zhang PetscFunctionBegin; 25812989dfd4SHong Zhang PetscValidType(F, 1); 258228b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 25838928b65cSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 2584bc6112feSHong Zhang PetscValidLogicalCollectiveReal(F, val, 3); 2585413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2586cac4c232SBarry Smith PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 25873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25888928b65cSHong Zhang } 25898928b65cSHong Zhang 2590a21f80fcSHong Zhang /*@ 2591a21f80fcSHong Zhang MatMumpsGetCntl - Get MUMPS parameter CNTL() 2592a21f80fcSHong Zhang 2593c3339decSBarry Smith Logically Collective 2594a21f80fcSHong Zhang 2595a21f80fcSHong Zhang Input Parameters: 259611a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2597a21f80fcSHong Zhang - icntl - index of MUMPS parameter array CNTL() 2598a21f80fcSHong Zhang 2599a21f80fcSHong Zhang Output Parameter: 2600a21f80fcSHong Zhang . val - value of MUMPS CNTL(icntl) 2601a21f80fcSHong Zhang 2602a21f80fcSHong Zhang Level: beginner 2603a21f80fcSHong Zhang 260496a0c994SBarry Smith References: 2605606c0280SSatish Balay . * - MUMPS Users' Guide 2606a21f80fcSHong Zhang 26071cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2608a21f80fcSHong Zhang @*/ 2609d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2610d71ae5a4SJacob Faibussowitsch { 2611bc6112feSHong Zhang PetscFunctionBegin; 26122989dfd4SHong Zhang PetscValidType(F, 1); 261328b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2614bc6112feSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 2615bc6112feSHong Zhang PetscValidRealPointer(val, 3); 2616413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2617cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 26183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2619bc6112feSHong Zhang } 2620bc6112feSHong Zhang 2621d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2622d71ae5a4SJacob Faibussowitsch { 2623e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2624bc6112feSHong Zhang 2625bc6112feSHong Zhang PetscFunctionBegin; 2626bc6112feSHong Zhang *info = mumps->id.INFO(icntl); 26273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2628bc6112feSHong Zhang } 2629bc6112feSHong Zhang 2630d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2631d71ae5a4SJacob Faibussowitsch { 2632e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2633bc6112feSHong Zhang 2634bc6112feSHong Zhang PetscFunctionBegin; 2635bc6112feSHong Zhang *infog = mumps->id.INFOG(icntl); 26363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2637bc6112feSHong Zhang } 2638bc6112feSHong Zhang 2639d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2640d71ae5a4SJacob Faibussowitsch { 2641e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2642bc6112feSHong Zhang 2643bc6112feSHong Zhang PetscFunctionBegin; 2644bc6112feSHong Zhang *rinfo = mumps->id.RINFO(icntl); 26453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2646bc6112feSHong Zhang } 2647bc6112feSHong Zhang 2648d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2649d71ae5a4SJacob Faibussowitsch { 2650e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2651bc6112feSHong Zhang 2652bc6112feSHong Zhang PetscFunctionBegin; 2653bc6112feSHong Zhang *rinfog = mumps->id.RINFOG(icntl); 26543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2655bc6112feSHong Zhang } 2656bc6112feSHong Zhang 26575c0bae8cSAshish Patel PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 26585c0bae8cSAshish Patel { 26595c0bae8cSAshish Patel Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 26605c0bae8cSAshish Patel 26615c0bae8cSAshish Patel PetscFunctionBegin; 26625c0bae8cSAshish Patel PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 26635c0bae8cSAshish Patel *size = 0; 26645c0bae8cSAshish Patel *array = NULL; 26655c0bae8cSAshish Patel if (!mumps->myid) { 26665c0bae8cSAshish Patel *size = mumps->id.INFOG(28); 26675c0bae8cSAshish Patel PetscCall(PetscMalloc1(*size, array)); 26685c0bae8cSAshish Patel for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 26695c0bae8cSAshish Patel } 26705c0bae8cSAshish Patel PetscFunctionReturn(PETSC_SUCCESS); 26715c0bae8cSAshish Patel } 26725c0bae8cSAshish Patel 2673d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2674d71ae5a4SJacob Faibussowitsch { 26750e6b8875SHong Zhang Mat Bt = NULL, Btseq = NULL; 26760e6b8875SHong Zhang PetscBool flg; 2677bb599dfdSHong Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2678bb599dfdSHong Zhang PetscScalar *aa; 2679f410b75aSHong Zhang PetscInt spnr, *ia, *ja, M, nrhs; 2680bb599dfdSHong Zhang 2681bb599dfdSHong Zhang PetscFunctionBegin; 2682064a246eSJacob Faibussowitsch PetscValidPointer(spRHS, 2); 2683013e2dc7SBarry Smith PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 26840e6b8875SHong Zhang if (flg) { 26859566063dSJacob Faibussowitsch PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2686013e2dc7SBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2687bb599dfdSHong Zhang 26889566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2689bb599dfdSHong Zhang 26902d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 26910e6b8875SHong Zhang Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 26920e6b8875SHong Zhang Btseq = b->A; 26930e6b8875SHong Zhang } else { 26940e6b8875SHong Zhang Btseq = Bt; 26950e6b8875SHong Zhang } 26960e6b8875SHong Zhang 26979566063dSJacob Faibussowitsch PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2698f410b75aSHong Zhang mumps->id.nrhs = nrhs; 2699f410b75aSHong Zhang mumps->id.lrhs = M; 2700f410b75aSHong Zhang mumps->id.rhs = NULL; 2701f410b75aSHong Zhang 2702e3f2db6aSHong Zhang if (!mumps->myid) { 27039566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 27049566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 270528b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 27069566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2707bb599dfdSHong Zhang mumps->id.rhs_sparse = (MumpsScalar *)aa; 2708e3f2db6aSHong Zhang } else { 2709e3f2db6aSHong Zhang mumps->id.irhs_ptr = NULL; 2710e3f2db6aSHong Zhang mumps->id.irhs_sparse = NULL; 2711e3f2db6aSHong Zhang mumps->id.nz_rhs = 0; 2712e3f2db6aSHong Zhang mumps->id.rhs_sparse = NULL; 2713e3f2db6aSHong Zhang } 2714bb599dfdSHong Zhang mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2715e3f2db6aSHong Zhang mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2716bb599dfdSHong Zhang 2717bb599dfdSHong Zhang /* solve phase */ 2718bb599dfdSHong Zhang mumps->id.job = JOB_SOLVE; 27193ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 2720049d1499SBarry Smith PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 272114267174SHong Zhang 2722e3f2db6aSHong Zhang if (!mumps->myid) { 27239566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 27249566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 272528b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2726e3f2db6aSHong Zhang } 27273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2728bb599dfdSHong Zhang } 2729bb599dfdSHong Zhang 2730bb599dfdSHong Zhang /*@ 27312ef1f0ffSBarry Smith MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 2732bb599dfdSHong Zhang 2733c3339decSBarry Smith Logically Collective 2734bb599dfdSHong Zhang 273520f4b53cSBarry Smith Input Parameter: 273620f4b53cSBarry Smith . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2737bb599dfdSHong Zhang 2738bb599dfdSHong Zhang Output Parameter: 273920f4b53cSBarry Smith . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 2740bb599dfdSHong Zhang 2741bb599dfdSHong Zhang Level: beginner 2742bb599dfdSHong Zhang 2743bb599dfdSHong Zhang References: 2744606c0280SSatish Balay . * - MUMPS Users' Guide 2745bb599dfdSHong Zhang 27461cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 2747bb599dfdSHong Zhang @*/ 2748d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2749d71ae5a4SJacob Faibussowitsch { 2750bb599dfdSHong Zhang PetscFunctionBegin; 2751bb599dfdSHong Zhang PetscValidType(F, 1); 275228b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2753cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 27543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2755bb599dfdSHong Zhang } 2756bb599dfdSHong Zhang 2757d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 2758d71ae5a4SJacob Faibussowitsch { 27590e6b8875SHong Zhang Mat spRHS; 27600e6b8875SHong Zhang 27610e6b8875SHong Zhang PetscFunctionBegin; 27629566063dSJacob Faibussowitsch PetscCall(MatCreateTranspose(spRHST, &spRHS)); 27639566063dSJacob Faibussowitsch PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 27649566063dSJacob Faibussowitsch PetscCall(MatDestroy(&spRHS)); 27653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 27660e6b8875SHong Zhang } 27670e6b8875SHong Zhang 27680e6b8875SHong Zhang /*@ 27692ef1f0ffSBarry Smith MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 27700e6b8875SHong Zhang 2771c3339decSBarry Smith Logically Collective 27720e6b8875SHong Zhang 277320f4b53cSBarry Smith Input Parameter: 277420f4b53cSBarry Smith . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 27750e6b8875SHong Zhang 27760e6b8875SHong Zhang Output Parameter: 277720f4b53cSBarry Smith . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 27780e6b8875SHong Zhang 27790e6b8875SHong Zhang Level: beginner 27800e6b8875SHong Zhang 27810e6b8875SHong Zhang References: 2782606c0280SSatish Balay . * - MUMPS Users' Guide 27830e6b8875SHong Zhang 27841cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 27850e6b8875SHong Zhang @*/ 2786d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 2787d71ae5a4SJacob Faibussowitsch { 27880e6b8875SHong Zhang PetscBool flg; 27890e6b8875SHong Zhang 27900e6b8875SHong Zhang PetscFunctionBegin; 27910e6b8875SHong Zhang PetscValidType(F, 1); 279228b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 27939566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 279428b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 27950e6b8875SHong Zhang 2796cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 27973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 27980e6b8875SHong Zhang } 27990e6b8875SHong Zhang 2800a21f80fcSHong Zhang /*@ 2801a21f80fcSHong Zhang MatMumpsGetInfo - Get MUMPS parameter INFO() 2802a21f80fcSHong Zhang 2803c3339decSBarry Smith Logically Collective 2804a21f80fcSHong Zhang 2805a21f80fcSHong Zhang Input Parameters: 280611a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2807a21f80fcSHong Zhang - icntl - index of MUMPS parameter array INFO() 2808a21f80fcSHong Zhang 2809a21f80fcSHong Zhang Output Parameter: 2810a21f80fcSHong Zhang . ival - value of MUMPS INFO(icntl) 2811a21f80fcSHong Zhang 2812a21f80fcSHong Zhang Level: beginner 2813a21f80fcSHong Zhang 281496a0c994SBarry Smith References: 2815606c0280SSatish Balay . * - MUMPS Users' Guide 2816a21f80fcSHong Zhang 28171cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2818a21f80fcSHong Zhang @*/ 2819d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 2820d71ae5a4SJacob Faibussowitsch { 2821bc6112feSHong Zhang PetscFunctionBegin; 28222989dfd4SHong Zhang PetscValidType(F, 1); 282328b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2824ca810319SHong Zhang PetscValidIntPointer(ival, 3); 2825cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 28263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2827bc6112feSHong Zhang } 2828bc6112feSHong Zhang 2829a21f80fcSHong Zhang /*@ 2830a21f80fcSHong Zhang MatMumpsGetInfog - Get MUMPS parameter INFOG() 2831a21f80fcSHong Zhang 2832c3339decSBarry Smith Logically Collective 2833a21f80fcSHong Zhang 2834a21f80fcSHong Zhang Input Parameters: 283511a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2836a21f80fcSHong Zhang - icntl - index of MUMPS parameter array INFOG() 2837a21f80fcSHong Zhang 2838a21f80fcSHong Zhang Output Parameter: 2839a21f80fcSHong Zhang . ival - value of MUMPS INFOG(icntl) 2840a21f80fcSHong Zhang 2841a21f80fcSHong Zhang Level: beginner 2842a21f80fcSHong Zhang 284396a0c994SBarry Smith References: 2844606c0280SSatish Balay . * - MUMPS Users' Guide 2845a21f80fcSHong Zhang 28461cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2847a21f80fcSHong Zhang @*/ 2848d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 2849d71ae5a4SJacob Faibussowitsch { 2850bc6112feSHong Zhang PetscFunctionBegin; 28512989dfd4SHong Zhang PetscValidType(F, 1); 285228b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2853ca810319SHong Zhang PetscValidIntPointer(ival, 3); 2854cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 28553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2856bc6112feSHong Zhang } 2857bc6112feSHong Zhang 2858a21f80fcSHong Zhang /*@ 2859a21f80fcSHong Zhang MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2860a21f80fcSHong Zhang 2861c3339decSBarry Smith Logically Collective 2862a21f80fcSHong Zhang 2863a21f80fcSHong Zhang Input Parameters: 286411a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2865a21f80fcSHong Zhang - icntl - index of MUMPS parameter array RINFO() 2866a21f80fcSHong Zhang 2867a21f80fcSHong Zhang Output Parameter: 2868a21f80fcSHong Zhang . val - value of MUMPS RINFO(icntl) 2869a21f80fcSHong Zhang 2870a21f80fcSHong Zhang Level: beginner 2871a21f80fcSHong Zhang 287296a0c994SBarry Smith References: 2873606c0280SSatish Balay . * - MUMPS Users' Guide 2874a21f80fcSHong Zhang 28751cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 2876a21f80fcSHong Zhang @*/ 2877d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 2878d71ae5a4SJacob Faibussowitsch { 2879bc6112feSHong Zhang PetscFunctionBegin; 28802989dfd4SHong Zhang PetscValidType(F, 1); 288128b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2882bc6112feSHong Zhang PetscValidRealPointer(val, 3); 2883cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 28843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2885bc6112feSHong Zhang } 2886bc6112feSHong Zhang 2887a21f80fcSHong Zhang /*@ 2888a21f80fcSHong Zhang MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2889a21f80fcSHong Zhang 2890c3339decSBarry Smith Logically Collective 2891a21f80fcSHong Zhang 2892a21f80fcSHong Zhang Input Parameters: 289311a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2894a21f80fcSHong Zhang - icntl - index of MUMPS parameter array RINFOG() 2895a21f80fcSHong Zhang 2896a21f80fcSHong Zhang Output Parameter: 2897a21f80fcSHong Zhang . val - value of MUMPS RINFOG(icntl) 2898a21f80fcSHong Zhang 2899a21f80fcSHong Zhang Level: beginner 2900a21f80fcSHong Zhang 290196a0c994SBarry Smith References: 2902606c0280SSatish Balay . * - MUMPS Users' Guide 2903a21f80fcSHong Zhang 29041cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 2905a21f80fcSHong Zhang @*/ 2906d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 2907d71ae5a4SJacob Faibussowitsch { 2908bc6112feSHong Zhang PetscFunctionBegin; 29092989dfd4SHong Zhang PetscValidType(F, 1); 291028b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2911bc6112feSHong Zhang PetscValidRealPointer(val, 3); 2912cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 29133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2914bc6112feSHong Zhang } 2915bc6112feSHong Zhang 29165c0bae8cSAshish Patel /*@ 29175c0bae8cSAshish Patel MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 29185c0bae8cSAshish Patel 29195c0bae8cSAshish Patel Logically Collective 29205c0bae8cSAshish Patel 29215c0bae8cSAshish Patel Input Parameter: 29225c0bae8cSAshish Patel . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 29235c0bae8cSAshish Patel 29245c0bae8cSAshish Patel Output Parameters: 29255c0bae8cSAshish Patel + size - local size of the array. The size of the array is non-zero only on the host. 29265c0bae8cSAshish Patel - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 29275c0bae8cSAshish Patel for freeing this array. 29285c0bae8cSAshish Patel 29295c0bae8cSAshish Patel Level: beginner 29305c0bae8cSAshish Patel 29315c0bae8cSAshish Patel References: 29325c0bae8cSAshish Patel . * - MUMPS Users' Guide 29335c0bae8cSAshish Patel 29341cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 29355c0bae8cSAshish Patel @*/ 29365c0bae8cSAshish Patel PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 29375c0bae8cSAshish Patel { 29385c0bae8cSAshish Patel PetscFunctionBegin; 29395c0bae8cSAshish Patel PetscValidType(F, 1); 29405c0bae8cSAshish Patel PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 29415c0bae8cSAshish Patel PetscValidIntPointer(size, 3); 29425c0bae8cSAshish Patel PetscValidPointer(array, 4); 29435c0bae8cSAshish Patel PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 29445c0bae8cSAshish Patel PetscFunctionReturn(PETSC_SUCCESS); 29455c0bae8cSAshish Patel } 29465c0bae8cSAshish Patel 294724b6179bSKris Buschelman /*MC 29482692d6eeSBarry Smith MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 294924b6179bSKris Buschelman distributed and sequential matrices via the external package MUMPS. 295024b6179bSKris Buschelman 295111a5261eSBarry Smith Works with `MATAIJ` and `MATSBAIJ` matrices 295224b6179bSKris Buschelman 2953c2b89b5dSBarry Smith Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2954c2b89b5dSBarry Smith 29552ef1f0ffSBarry Smith Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 29562ef1f0ffSBarry Smith See details below. 2957217d3b1eSJunchao Zhang 29582ef1f0ffSBarry Smith Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 2959c2b89b5dSBarry Smith 296024b6179bSKris Buschelman Options Database Keys: 29614422a9fcSPatrick Sanan + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 29624422a9fcSPatrick Sanan . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 29634422a9fcSPatrick Sanan . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 29644422a9fcSPatrick Sanan . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 29654422a9fcSPatrick Sanan . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2966b53c1a7fSBarry Smith . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2967b53c1a7fSBarry Smith Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 29684422a9fcSPatrick Sanan . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 29694422a9fcSPatrick Sanan . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 29704422a9fcSPatrick Sanan . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 29714422a9fcSPatrick Sanan . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 29724422a9fcSPatrick Sanan . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 29734422a9fcSPatrick Sanan . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 297445e3843bSPierre Jolivet . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 29754422a9fcSPatrick Sanan . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 297625aac85cSJunchao Zhang . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 29774422a9fcSPatrick Sanan . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 29784422a9fcSPatrick Sanan . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 29794422a9fcSPatrick Sanan . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 29804422a9fcSPatrick Sanan . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 29814422a9fcSPatrick Sanan . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 29824422a9fcSPatrick Sanan . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 29834422a9fcSPatrick Sanan . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 29844422a9fcSPatrick Sanan . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 29854422a9fcSPatrick Sanan . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 29864422a9fcSPatrick Sanan . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2987a0e18203SThibaut Appel . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2988a0e18203SThibaut Appel . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2989a0e18203SThibaut Appel . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 29904422a9fcSPatrick Sanan . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 29914422a9fcSPatrick Sanan . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 29924422a9fcSPatrick Sanan . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 29934422a9fcSPatrick Sanan . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2994217d3b1eSJunchao Zhang . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2995a0e18203SThibaut Appel . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2996217d3b1eSJunchao Zhang - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2997217d3b1eSJunchao Zhang Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 299824b6179bSKris Buschelman 299924b6179bSKris Buschelman Level: beginner 300024b6179bSKris Buschelman 300195452b02SPatrick Sanan Notes: 30022ef1f0ffSBarry Smith MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 30032ef1f0ffSBarry Smith error if the matrix is Hermitian. 300438548759SBarry Smith 300526cc229bSBarry Smith When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 300626cc229bSBarry Smith `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 300726cc229bSBarry Smith 30082ef1f0ffSBarry Smith When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 30092ef1f0ffSBarry Smith the failure with 30102ef1f0ffSBarry Smith .vb 30112ef1f0ffSBarry Smith KSPGetPC(ksp,&pc); 30122ef1f0ffSBarry Smith PCFactorGetMatrix(pc,&mat); 30132ef1f0ffSBarry Smith MatMumpsGetInfo(mat,....); 30142ef1f0ffSBarry Smith MatMumpsGetInfog(mat,....); etc. 30152ef1f0ffSBarry Smith .ve 30162ef1f0ffSBarry Smith Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 30179fc87aa7SBarry Smith 3018a5399872SJunchao Zhang MUMPS provides 64-bit integer support in two build modes: 3019a5399872SJunchao Zhang full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3020a5399872SJunchao Zhang requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 30218fcaa860SBarry Smith 3022a5399872SJunchao Zhang selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3023a5399872SJunchao Zhang MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3024a5399872SJunchao Zhang columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3025a5399872SJunchao Zhang integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3026a5399872SJunchao Zhang 3027a5399872SJunchao Zhang With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3028a5399872SJunchao Zhang 3029a5399872SJunchao Zhang Two modes to run MUMPS/PETSc with OpenMP 30302ef1f0ffSBarry Smith .vb 30312ef1f0ffSBarry Smith Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 30322ef1f0ffSBarry Smith threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 30332ef1f0ffSBarry Smith .ve 30348fcaa860SBarry Smith 30352ef1f0ffSBarry Smith .vb 30362ef1f0ffSBarry Smith -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 30372ef1f0ffSBarry Smith if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 30382ef1f0ffSBarry Smith .ve 30398fcaa860SBarry Smith 30408fcaa860SBarry Smith To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 30412ef1f0ffSBarry Smith (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 30422ef1f0ffSBarry Smith (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 30438fcaa860SBarry Smith libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 30448fcaa860SBarry Smith (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3045217d3b1eSJunchao Zhang 30468fcaa860SBarry Smith If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3047217d3b1eSJunchao Zhang processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3048217d3b1eSJunchao Zhang size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3049217d3b1eSJunchao Zhang are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3050217d3b1eSJunchao Zhang by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3051217d3b1eSJunchao Zhang In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3052217d3b1eSJunchao Zhang if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3053217d3b1eSJunchao Zhang MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3054217d3b1eSJunchao Zhang cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3055217d3b1eSJunchao Zhang problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 30568fcaa860SBarry Smith For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3057217d3b1eSJunchao Zhang examine the mapping result. 3058217d3b1eSJunchao Zhang 305911a5261eSBarry Smith PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 306011a5261eSBarry Smith for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 306111a5261eSBarry Smith calls `omp_set_num_threads`(m) internally before calling MUMPS. 3062217d3b1eSJunchao Zhang 3063217d3b1eSJunchao Zhang References: 3064606c0280SSatish Balay + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3065606c0280SSatish Balay - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3066217d3b1eSJunchao Zhang 30671cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 306824b6179bSKris Buschelman M*/ 306924b6179bSKris Buschelman 3070d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3071d71ae5a4SJacob Faibussowitsch { 307235bd34faSBarry Smith PetscFunctionBegin; 30732692d6eeSBarry Smith *type = MATSOLVERMUMPS; 30743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 307535bd34faSBarry Smith } 307635bd34faSBarry Smith 3077bccb9932SShri Abhyankar /* MatGetFactor for Seq and MPI AIJ matrices */ 3078d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3079d71ae5a4SJacob Faibussowitsch { 30802877fffaSHong Zhang Mat B; 30812877fffaSHong Zhang Mat_MUMPS *mumps; 3082ace3abfcSBarry Smith PetscBool isSeqAIJ; 30832c7c0729SBarry Smith PetscMPIInt size; 30842877fffaSHong Zhang 30852877fffaSHong Zhang PetscFunctionBegin; 3086eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 3087b94d7dedSBarry Smith PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3088eb1ec7c1SStefano Zampini #endif 30892877fffaSHong Zhang /* Create the factorization matrix */ 30909566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 30919566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 30929566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 30939566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 30949566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 30952877fffaSHong Zhang 30964dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 30972205254eSKarl Rupp 30982877fffaSHong Zhang B->ops->view = MatView_MUMPS; 309935bd34faSBarry Smith B->ops->getinfo = MatGetInfo_MUMPS; 31002205254eSKarl Rupp 31019566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 31029566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 31039566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 31049566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 31059566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 31069566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 31079566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 31089566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 31099566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 31109566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 31119566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 31125c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 31139566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 31149566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 31156444a565SStefano Zampini 3116450b117fSShri Abhyankar if (ftype == MAT_FACTOR_LU) { 3117450b117fSShri Abhyankar B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3118d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 3119bccb9932SShri Abhyankar if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3120bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 31219566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3122746480a1SHong Zhang mumps->sym = 0; 3123dcd589f8SShri Abhyankar } else { 312467877ebaSShri Abhyankar B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3125450b117fSShri Abhyankar B->factortype = MAT_FACTOR_CHOLESKY; 3126bccb9932SShri Abhyankar if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3127bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 31289566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 312959ac8732SStefano Zampini #if defined(PETSC_USE_COMPLEX) 313059ac8732SStefano Zampini mumps->sym = 2; 313159ac8732SStefano Zampini #else 3132b94d7dedSBarry Smith if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 31336fdc2a6dSBarry Smith else mumps->sym = 2; 313459ac8732SStefano Zampini #endif 3135450b117fSShri Abhyankar } 31362877fffaSHong Zhang 313700c67f3bSHong Zhang /* set solvertype */ 31389566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 31399566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 31409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 31412c7c0729SBarry Smith if (size == 1) { 31424ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3143f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 31442c7c0729SBarry Smith } 31452877fffaSHong Zhang B->ops->destroy = MatDestroy_MUMPS; 3146e69c285eSBarry Smith B->data = (void *)mumps; 31472205254eSKarl Rupp 31482877fffaSHong Zhang *F = B; 3149413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3150413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3151413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3152d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 31533ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 31542877fffaSHong Zhang } 31552877fffaSHong Zhang 3156bccb9932SShri Abhyankar /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3157d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3158d71ae5a4SJacob Faibussowitsch { 31592877fffaSHong Zhang Mat B; 31602877fffaSHong Zhang Mat_MUMPS *mumps; 3161ace3abfcSBarry Smith PetscBool isSeqSBAIJ; 31622c7c0729SBarry Smith PetscMPIInt size; 31632877fffaSHong Zhang 31642877fffaSHong Zhang PetscFunctionBegin; 3165eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 3166b94d7dedSBarry Smith PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3167eb1ec7c1SStefano Zampini #endif 31689566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 31699566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 31709566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 31719566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 3172e69c285eSBarry Smith 31734dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 31749566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3175bccb9932SShri Abhyankar if (isSeqSBAIJ) { 317616ebf90aSShri Abhyankar mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3177dcd589f8SShri Abhyankar } else { 3178bccb9932SShri Abhyankar mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3179bccb9932SShri Abhyankar } 3180bccb9932SShri Abhyankar 318167877ebaSShri Abhyankar B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3182bccb9932SShri Abhyankar B->ops->view = MatView_MUMPS; 3183722b6324SPierre Jolivet B->ops->getinfo = MatGetInfo_MUMPS; 31842205254eSKarl Rupp 31859566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 31869566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 31879566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 31889566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 31899566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 31909566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 31919566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 31929566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 31939566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 31949566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 31959566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 31965c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 31979566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 31989566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 31992205254eSKarl Rupp 3200f4762488SHong Zhang B->factortype = MAT_FACTOR_CHOLESKY; 320159ac8732SStefano Zampini #if defined(PETSC_USE_COMPLEX) 320259ac8732SStefano Zampini mumps->sym = 2; 320359ac8732SStefano Zampini #else 3204b94d7dedSBarry Smith if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 32056fdc2a6dSBarry Smith else mumps->sym = 2; 320659ac8732SStefano Zampini #endif 3207a214ac2aSShri Abhyankar 320800c67f3bSHong Zhang /* set solvertype */ 32099566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 32109566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 32119566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 32122c7c0729SBarry Smith if (size == 1) { 32134ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3214f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 32152c7c0729SBarry Smith } 32169566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3217f3c0ef26SHong Zhang B->ops->destroy = MatDestroy_MUMPS; 3218e69c285eSBarry Smith B->data = (void *)mumps; 32192205254eSKarl Rupp 32202877fffaSHong Zhang *F = B; 3221413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3222413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3223413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3224d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 32253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 32262877fffaSHong Zhang } 322797969023SHong Zhang 3228d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3229d71ae5a4SJacob Faibussowitsch { 323067877ebaSShri Abhyankar Mat B; 323167877ebaSShri Abhyankar Mat_MUMPS *mumps; 3232ace3abfcSBarry Smith PetscBool isSeqBAIJ; 32332c7c0729SBarry Smith PetscMPIInt size; 323467877ebaSShri Abhyankar 323567877ebaSShri Abhyankar PetscFunctionBegin; 323667877ebaSShri Abhyankar /* Create the factorization matrix */ 32379566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 32389566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 32399566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 32409566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 32419566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 3242450b117fSShri Abhyankar 32434dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 3244450b117fSShri Abhyankar if (ftype == MAT_FACTOR_LU) { 3245450b117fSShri Abhyankar B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3246450b117fSShri Abhyankar B->factortype = MAT_FACTOR_LU; 3247bccb9932SShri Abhyankar if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3248bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3249746480a1SHong Zhang mumps->sym = 0; 32509566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3251546078acSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3252bccb9932SShri Abhyankar 3253450b117fSShri Abhyankar B->ops->view = MatView_MUMPS; 3254722b6324SPierre Jolivet B->ops->getinfo = MatGetInfo_MUMPS; 32552205254eSKarl Rupp 32569566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 32579566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 32589566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 32599566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 32609566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 32619566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 32629566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 32639566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 32649566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 32659566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 32669566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 32675c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 32689566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 32699566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3270450b117fSShri Abhyankar 327100c67f3bSHong Zhang /* set solvertype */ 32729566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 32739566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 32749566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 32752c7c0729SBarry Smith if (size == 1) { 32764ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3277f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 32782c7c0729SBarry Smith } 32797ee00b23SStefano Zampini B->ops->destroy = MatDestroy_MUMPS; 32807ee00b23SStefano Zampini B->data = (void *)mumps; 32817ee00b23SStefano Zampini 32827ee00b23SStefano Zampini *F = B; 3283413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3284413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3285413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3286d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 32873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 32887ee00b23SStefano Zampini } 32897ee00b23SStefano Zampini 32907ee00b23SStefano Zampini /* MatGetFactor for Seq and MPI SELL matrices */ 3291d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3292d71ae5a4SJacob Faibussowitsch { 32937ee00b23SStefano Zampini Mat B; 32947ee00b23SStefano Zampini Mat_MUMPS *mumps; 32957ee00b23SStefano Zampini PetscBool isSeqSELL; 32962c7c0729SBarry Smith PetscMPIInt size; 32977ee00b23SStefano Zampini 32987ee00b23SStefano Zampini PetscFunctionBegin; 32997ee00b23SStefano Zampini /* Create the factorization matrix */ 33009566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 33019566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 33029566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 33039566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 33049566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 33057ee00b23SStefano Zampini 33064dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 33077ee00b23SStefano Zampini 33087ee00b23SStefano Zampini B->ops->view = MatView_MUMPS; 33097ee00b23SStefano Zampini B->ops->getinfo = MatGetInfo_MUMPS; 33107ee00b23SStefano Zampini 33119566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 33129566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 33139566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 33149566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 33159566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 33169566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 33179566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 33189566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 33199566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 33209566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 33219566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 33225c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 33237ee00b23SStefano Zampini 33247ee00b23SStefano Zampini if (ftype == MAT_FACTOR_LU) { 33257ee00b23SStefano Zampini B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 33267ee00b23SStefano Zampini B->factortype = MAT_FACTOR_LU; 33277ee00b23SStefano Zampini if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 33287ee00b23SStefano Zampini else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 33297ee00b23SStefano Zampini mumps->sym = 0; 33309566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 33317ee00b23SStefano Zampini } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 33327ee00b23SStefano Zampini 33337ee00b23SStefano Zampini /* set solvertype */ 33349566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 33359566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 33369566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 33372c7c0729SBarry Smith if (size == 1) { 33384ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3339f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 33402c7c0729SBarry Smith } 3341450b117fSShri Abhyankar B->ops->destroy = MatDestroy_MUMPS; 3342e69c285eSBarry Smith B->data = (void *)mumps; 33432205254eSKarl Rupp 3344450b117fSShri Abhyankar *F = B; 3345413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3346413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3347413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3348d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 33493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3350450b117fSShri Abhyankar } 335142c9c57cSBarry Smith 3352d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3353d71ae5a4SJacob Faibussowitsch { 335442c9c57cSBarry Smith PetscFunctionBegin; 33559566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 33569566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 33579566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 33589566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 33599566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 33609566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 33619566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 33629566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 33639566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 33649566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 33659566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 33663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 336742c9c57cSBarry Smith } 3368