11c2a3de1SBarry Smith 2397b6df1SKris Buschelman /* 3c2b5dc30SHong Zhang Provides an interface to the MUMPS sparse solver 4397b6df1SKris Buschelman */ 567602552SJunchao Zhang #include <petscpkg_version.h> 6*9d0448ceSStefano Zampini #include <petscsf.h> 7c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 8c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 97ee00b23SStefano Zampini #include <../src/mat/impls/sell/mpi/mpisell.h> 10397b6df1SKris Buschelman 11397b6df1SKris Buschelman EXTERN_C_BEGIN 12397b6df1SKris Buschelman #if defined(PETSC_USE_COMPLEX) 132907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 142907cef9SHong Zhang #include <cmumps_c.h> 152907cef9SHong Zhang #else 16c6db04a5SJed Brown #include <zmumps_c.h> 172907cef9SHong Zhang #endif 182907cef9SHong Zhang #else 192907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 202907cef9SHong Zhang #include <smumps_c.h> 21397b6df1SKris Buschelman #else 22c6db04a5SJed Brown #include <dmumps_c.h> 23397b6df1SKris Buschelman #endif 242907cef9SHong Zhang #endif 25397b6df1SKris Buschelman EXTERN_C_END 26397b6df1SKris Buschelman #define JOB_INIT -1 27413bcc21SPierre Jolivet #define JOB_NULL 0 283d472b54SHong Zhang #define JOB_FACTSYMBOLIC 1 293d472b54SHong Zhang #define JOB_FACTNUMERIC 2 303d472b54SHong Zhang #define JOB_SOLVE 3 31397b6df1SKris Buschelman #define JOB_END -2 323d472b54SHong Zhang 332907cef9SHong Zhang /* calls to MUMPS */ 342907cef9SHong Zhang #if defined(PETSC_USE_COMPLEX) 352907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 363ab56b82SJunchao Zhang #define MUMPS_c cmumps_c 372907cef9SHong Zhang #else 383ab56b82SJunchao Zhang #define MUMPS_c zmumps_c 392907cef9SHong Zhang #endif 402907cef9SHong Zhang #else 412907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 423ab56b82SJunchao Zhang #define MUMPS_c smumps_c 432907cef9SHong Zhang #else 443ab56b82SJunchao Zhang #define MUMPS_c dmumps_c 452907cef9SHong Zhang #endif 462907cef9SHong Zhang #endif 472907cef9SHong Zhang 48a6053eceSJunchao Zhang /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 49a6053eceSJunchao Zhang number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 50a6053eceSJunchao Zhang naming convention in PetscMPIInt, PetscBLASInt etc. 51a6053eceSJunchao Zhang */ 52a6053eceSJunchao Zhang typedef MUMPS_INT PetscMUMPSInt; 53a6053eceSJunchao Zhang 5467602552SJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 5567602552SJunchao Zhang #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 56a6053eceSJunchao Zhang #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 5767602552SJunchao Zhang #endif 58a6053eceSJunchao Zhang #else 5967602552SJunchao Zhang #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 6067602552SJunchao Zhang #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 6167602552SJunchao Zhang #endif 6267602552SJunchao Zhang #endif 6367602552SJunchao Zhang 64a6053eceSJunchao Zhang #define MPIU_MUMPSINT MPI_INT 65a6053eceSJunchao Zhang #define PETSC_MUMPS_INT_MAX 2147483647 66a6053eceSJunchao Zhang #define PETSC_MUMPS_INT_MIN -2147483648 67a6053eceSJunchao Zhang 68a6053eceSJunchao Zhang /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 69d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 70d71ae5a4SJacob Faibussowitsch { 71a6053eceSJunchao Zhang PetscFunctionBegin; 72ece88022SPierre Jolivet #if PetscDefined(USE_64BIT_INDICES) 732c71b3e2SJacob Faibussowitsch PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 74ece88022SPierre Jolivet #endif 75a6053eceSJunchao Zhang *b = (PetscMUMPSInt)(a); 763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 77a6053eceSJunchao Zhang } 78a6053eceSJunchao Zhang 79a6053eceSJunchao Zhang /* Put these utility routines here since they are only used in this file */ 80d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 81d71ae5a4SJacob Faibussowitsch { 82a6053eceSJunchao Zhang PetscInt myval; 83a6053eceSJunchao Zhang PetscBool myset; 84a6053eceSJunchao Zhang PetscFunctionBegin; 85a6053eceSJunchao Zhang /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 869566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 879566063dSJacob Faibussowitsch if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 88a6053eceSJunchao Zhang if (set) *set = myset; 893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 90a6053eceSJunchao Zhang } 91a6053eceSJunchao Zhang #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 92a6053eceSJunchao Zhang 93217d3b1eSJunchao Zhang /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 943ab56b82SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 953ab56b82SJunchao Zhang #define PetscMUMPS_c(mumps) \ 963ab56b82SJunchao Zhang do { \ 973ab56b82SJunchao Zhang if (mumps->use_petsc_omp_support) { \ 983ab56b82SJunchao Zhang if (mumps->is_omp_master) { \ 999566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 1003ab56b82SJunchao Zhang MUMPS_c(&mumps->id); \ 1019566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 1023ab56b82SJunchao Zhang } \ 1039566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 104c3714a1dSJunchao Zhang /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 105c3714a1dSJunchao Zhang to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 106c3714a1dSJunchao Zhang an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 107c3714a1dSJunchao Zhang omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 108c3714a1dSJunchao Zhang */ \ 1099566063dSJacob Faibussowitsch PetscCallMPI(MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 1109566063dSJacob Faibussowitsch PetscCallMPI(MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm)); \ 1119566063dSJacob Faibussowitsch PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 1123ab56b82SJunchao Zhang } else { \ 1133ab56b82SJunchao Zhang MUMPS_c(&mumps->id); \ 1143ab56b82SJunchao Zhang } \ 1153ab56b82SJunchao Zhang } while (0) 1163ab56b82SJunchao Zhang #else 1173ab56b82SJunchao Zhang #define PetscMUMPS_c(mumps) \ 118d71ae5a4SJacob Faibussowitsch do { \ 119d71ae5a4SJacob Faibussowitsch MUMPS_c(&mumps->id); \ 120d71ae5a4SJacob Faibussowitsch } while (0) 1213ab56b82SJunchao Zhang #endif 1223ab56b82SJunchao Zhang 123940cd9d6SSatish Balay /* declare MumpsScalar */ 124940cd9d6SSatish Balay #if defined(PETSC_USE_COMPLEX) 125940cd9d6SSatish Balay #if defined(PETSC_USE_REAL_SINGLE) 126940cd9d6SSatish Balay #define MumpsScalar mumps_complex 127940cd9d6SSatish Balay #else 128940cd9d6SSatish Balay #define MumpsScalar mumps_double_complex 129940cd9d6SSatish Balay #endif 130940cd9d6SSatish Balay #else 131940cd9d6SSatish Balay #define MumpsScalar PetscScalar 132940cd9d6SSatish Balay #endif 1333d472b54SHong Zhang 134397b6df1SKris Buschelman /* macros s.t. indices match MUMPS documentation */ 135397b6df1SKris Buschelman #define ICNTL(I) icntl[(I)-1] 136397b6df1SKris Buschelman #define CNTL(I) cntl[(I)-1] 137397b6df1SKris Buschelman #define INFOG(I) infog[(I)-1] 138a7aca84bSHong Zhang #define INFO(I) info[(I)-1] 139397b6df1SKris Buschelman #define RINFOG(I) rinfog[(I)-1] 140adc1d99fSHong Zhang #define RINFO(I) rinfo[(I)-1] 141397b6df1SKris Buschelman 142a6053eceSJunchao Zhang typedef struct Mat_MUMPS Mat_MUMPS; 143a6053eceSJunchao Zhang struct Mat_MUMPS { 144397b6df1SKris Buschelman #if defined(PETSC_USE_COMPLEX) 1452907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 1462907cef9SHong Zhang CMUMPS_STRUC_C id; 1472907cef9SHong Zhang #else 148397b6df1SKris Buschelman ZMUMPS_STRUC_C id; 1492907cef9SHong Zhang #endif 1502907cef9SHong Zhang #else 1512907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 1522907cef9SHong Zhang SMUMPS_STRUC_C id; 153397b6df1SKris Buschelman #else 154397b6df1SKris Buschelman DMUMPS_STRUC_C id; 155397b6df1SKris Buschelman #endif 1562907cef9SHong Zhang #endif 1572907cef9SHong Zhang 158397b6df1SKris Buschelman MatStructure matstruc; 1592d4298aeSJunchao Zhang PetscMPIInt myid, petsc_size; 160a6053eceSJunchao Zhang PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 161a6053eceSJunchao Zhang PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 162a6053eceSJunchao Zhang PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 163a6053eceSJunchao Zhang PetscMUMPSInt sym; 1642d4298aeSJunchao Zhang MPI_Comm mumps_comm; 165413bcc21SPierre Jolivet PetscMUMPSInt *ICNTL_pre; 166413bcc21SPierre Jolivet PetscReal *CNTL_pre; 167a6053eceSJunchao Zhang PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 168801fbe65SHong Zhang VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 16925aac85cSJunchao Zhang PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 17067602552SJunchao Zhang PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 17167602552SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 17267602552SJunchao Zhang PetscInt *rhs_nrow, max_nrhs; 17367602552SJunchao Zhang PetscMPIInt *rhs_recvcounts, *rhs_disps; 17467602552SJunchao Zhang PetscScalar *rhs_loc, *rhs_recvbuf; 17567602552SJunchao Zhang #endif 176801fbe65SHong Zhang Vec b_seq, x_seq; 177a6053eceSJunchao Zhang PetscInt ninfo, *info; /* which INFO to display */ 178b5fa320bSStefano Zampini PetscInt sizeredrhs; 17959ac8732SStefano Zampini PetscScalar *schur_sol; 18059ac8732SStefano Zampini PetscInt schur_sizesol; 181a6053eceSJunchao Zhang PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 182a6053eceSJunchao Zhang PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 183a6053eceSJunchao Zhang PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 1842205254eSKarl Rupp 185*9d0448ceSStefano Zampini /* Support for MATNEST */ 186*9d0448ceSStefano Zampini PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 187*9d0448ceSStefano Zampini PetscInt64 *nest_vals_start; 188*9d0448ceSStefano Zampini PetscScalar *nest_vals; 189*9d0448ceSStefano Zampini 190a6053eceSJunchao Zhang /* stuff used by petsc/mumps OpenMP support*/ 1913ab56b82SJunchao Zhang PetscBool use_petsc_omp_support; 192da81f932SPierre Jolivet PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 1933ab56b82SJunchao Zhang MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 194a6053eceSJunchao Zhang PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 195a6053eceSJunchao Zhang PetscMPIInt tag, omp_comm_size; 1963ab56b82SJunchao Zhang PetscBool is_omp_master; /* is this rank the master of omp_comm */ 197a6053eceSJunchao Zhang MPI_Request *reqs; 198a6053eceSJunchao Zhang }; 1993ab56b82SJunchao Zhang 200a6053eceSJunchao Zhang /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 201a6053eceSJunchao Zhang Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 202a6053eceSJunchao Zhang */ 203d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 204d71ae5a4SJacob Faibussowitsch { 205a6053eceSJunchao Zhang PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 206f0c56d0fSKris Buschelman 207a6053eceSJunchao Zhang PetscFunctionBegin; 208a6053eceSJunchao Zhang #if defined(PETSC_USE_64BIT_INDICES) 209a6053eceSJunchao Zhang { 210a6053eceSJunchao Zhang PetscInt i; 211a6053eceSJunchao Zhang if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 2129566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ia_alloc)); 2139566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 214a6053eceSJunchao Zhang mumps->cur_ilen = nrow + 1; 215a6053eceSJunchao Zhang } 216a6053eceSJunchao Zhang if (nnz > mumps->cur_jlen) { 2179566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ja_alloc)); 2189566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 219a6053eceSJunchao Zhang mumps->cur_jlen = nnz; 220a6053eceSJunchao Zhang } 2219566063dSJacob Faibussowitsch for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 2229566063dSJacob Faibussowitsch for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 223a6053eceSJunchao Zhang *ia_mumps = mumps->ia_alloc; 224a6053eceSJunchao Zhang *ja_mumps = mumps->ja_alloc; 225a6053eceSJunchao Zhang } 226a6053eceSJunchao Zhang #else 227a6053eceSJunchao Zhang *ia_mumps = ia; 228a6053eceSJunchao Zhang *ja_mumps = ja; 229a6053eceSJunchao Zhang #endif 2309566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 2313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 232a6053eceSJunchao Zhang } 233b24902e0SBarry Smith 234d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 235d71ae5a4SJacob Faibussowitsch { 236b5fa320bSStefano Zampini PetscFunctionBegin; 2379566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.listvar_schur)); 2389566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.redrhs)); 2399566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->schur_sol)); 24059ac8732SStefano Zampini mumps->id.size_schur = 0; 241b3cb21ddSStefano Zampini mumps->id.schur_lld = 0; 24259ac8732SStefano Zampini mumps->id.ICNTL(19) = 0; 2433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 24459ac8732SStefano Zampini } 24559ac8732SStefano Zampini 246b3cb21ddSStefano Zampini /* solve with rhs in mumps->id.redrhs and return in the same location */ 247d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 248d71ae5a4SJacob Faibussowitsch { 249b3cb21ddSStefano Zampini Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 250b3cb21ddSStefano Zampini Mat S, B, X; 251b3cb21ddSStefano Zampini MatFactorSchurStatus schurstatus; 252b3cb21ddSStefano Zampini PetscInt sizesol; 25359ac8732SStefano Zampini 25459ac8732SStefano Zampini PetscFunctionBegin; 2559566063dSJacob Faibussowitsch PetscCall(MatFactorFactorizeSchurComplement(F)); 2569566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 2579566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 2589566063dSJacob Faibussowitsch PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 259a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2609566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(B, S->boundtocpu)); 261a3d589ffSStefano Zampini #endif 262b3cb21ddSStefano Zampini switch (schurstatus) { 263d71ae5a4SJacob Faibussowitsch case MAT_FACTOR_SCHUR_FACTORED: 264d71ae5a4SJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 265d71ae5a4SJacob Faibussowitsch PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 266a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2679566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(X, S->boundtocpu)); 268a3d589ffSStefano Zampini #endif 269b3cb21ddSStefano Zampini if (!mumps->id.ICNTL(9)) { /* transpose solve */ 2709566063dSJacob Faibussowitsch PetscCall(MatMatSolveTranspose(S, B, X)); 27159ac8732SStefano Zampini } else { 2729566063dSJacob Faibussowitsch PetscCall(MatMatSolve(S, B, X)); 27359ac8732SStefano Zampini } 274b3cb21ddSStefano Zampini break; 275b3cb21ddSStefano Zampini case MAT_FACTOR_SCHUR_INVERTED: 276b3cb21ddSStefano Zampini sizesol = mumps->id.nrhs * mumps->id.size_schur; 27759ac8732SStefano Zampini if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 2789566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->schur_sol)); 2799566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 28059ac8732SStefano Zampini mumps->schur_sizesol = sizesol; 281b5fa320bSStefano Zampini } 2829566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 2839566063dSJacob Faibussowitsch PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 284a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2859566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(X, S->boundtocpu)); 286a3d589ffSStefano Zampini #endif 2879566063dSJacob Faibussowitsch PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 28859ac8732SStefano Zampini if (!mumps->id.ICNTL(9)) { /* transpose solve */ 2899566063dSJacob Faibussowitsch PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 290b5fa320bSStefano Zampini } else { 2919566063dSJacob Faibussowitsch PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 292b5fa320bSStefano Zampini } 2939566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(X)); 2949566063dSJacob Faibussowitsch PetscCall(MatProductSymbolic(X)); 2959566063dSJacob Faibussowitsch PetscCall(MatProductNumeric(X)); 2964417c5e8SHong Zhang 2979566063dSJacob Faibussowitsch PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 298b3cb21ddSStefano Zampini break; 299d71ae5a4SJacob Faibussowitsch default: 300d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 30159ac8732SStefano Zampini } 3029566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 3039566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 3049566063dSJacob Faibussowitsch PetscCall(MatDestroy(&X)); 3053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 306b5fa320bSStefano Zampini } 307b5fa320bSStefano Zampini 308d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 309d71ae5a4SJacob Faibussowitsch { 310b3cb21ddSStefano Zampini Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 311b5fa320bSStefano Zampini 312b5fa320bSStefano Zampini PetscFunctionBegin; 313b5fa320bSStefano Zampini if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 3143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 315b5fa320bSStefano Zampini } 316b8f61ee1SStefano Zampini if (!expansion) { /* prepare for the condensation step */ 317b5fa320bSStefano Zampini PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 318b5fa320bSStefano Zampini /* allocate MUMPS internal array to store reduced right-hand sides */ 319b5fa320bSStefano Zampini if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 3209566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.redrhs)); 321b5fa320bSStefano Zampini mumps->id.lredrhs = mumps->id.size_schur; 3229566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 323b5fa320bSStefano Zampini mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 324b5fa320bSStefano Zampini } 325b5fa320bSStefano Zampini mumps->id.ICNTL(26) = 1; /* condensation phase */ 326b5fa320bSStefano Zampini } else { /* prepare for the expansion step */ 327b8f61ee1SStefano Zampini /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 3289566063dSJacob Faibussowitsch PetscCall(MatMumpsSolveSchur_Private(F)); 329b5fa320bSStefano Zampini mumps->id.ICNTL(26) = 2; /* expansion phase */ 3303ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 33108401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 332b5fa320bSStefano Zampini /* restore defaults */ 333b5fa320bSStefano Zampini mumps->id.ICNTL(26) = -1; 334d3d598ffSStefano Zampini /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 335d3d598ffSStefano Zampini if (mumps->id.nrhs > 1) { 3369566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.redrhs)); 337d3d598ffSStefano Zampini mumps->id.lredrhs = 0; 338d3d598ffSStefano Zampini mumps->sizeredrhs = 0; 339d3d598ffSStefano Zampini } 340b5fa320bSStefano Zampini } 3413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 342b5fa320bSStefano Zampini } 343b5fa320bSStefano Zampini 344397b6df1SKris Buschelman /* 345d341cd04SHong Zhang MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 346d341cd04SHong Zhang 347397b6df1SKris Buschelman input: 34875480915SPierre Jolivet A - matrix in aij,baij or sbaij format 349397b6df1SKris Buschelman shift - 0: C style output triple; 1: Fortran style output triple. 350bccb9932SShri Abhyankar reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 351bccb9932SShri Abhyankar MAT_REUSE_MATRIX: only the values in v array are updated 352397b6df1SKris Buschelman output: 353397b6df1SKris Buschelman nnz - dim of r, c, and v (number of local nonzero entries of A) 354397b6df1SKris Buschelman r, c, v - row and col index, matrix values (matrix triples) 355eb9baa12SBarry Smith 356eb9baa12SBarry Smith The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 3577ee00b23SStefano Zampini freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 358eb9baa12SBarry Smith that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 359eb9baa12SBarry Smith 360397b6df1SKris Buschelman */ 36116ebf90aSShri Abhyankar 362d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 363d71ae5a4SJacob Faibussowitsch { 364a3d589ffSStefano Zampini const PetscScalar *av; 365185f6596SHong Zhang const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 366a6053eceSJunchao Zhang PetscInt64 nz, rnz, i, j, k; 367a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 36816ebf90aSShri Abhyankar Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 369397b6df1SKris Buschelman 370397b6df1SKris Buschelman PetscFunctionBegin; 3719566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A, &av)); 372a6053eceSJunchao Zhang mumps->val = (PetscScalar *)av; 373bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 3742205254eSKarl Rupp nz = aa->nz; 3752205254eSKarl Rupp ai = aa->i; 3762205254eSKarl Rupp aj = aa->j; 3779566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 378a6053eceSJunchao Zhang for (i = k = 0; i < M; i++) { 37916ebf90aSShri Abhyankar rnz = ai[i + 1] - ai[i]; 38067877ebaSShri Abhyankar ajj = aj + ai[i]; 38167877ebaSShri Abhyankar for (j = 0; j < rnz; j++) { 3829566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 3839566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 384a6053eceSJunchao Zhang k++; 38516ebf90aSShri Abhyankar } 38616ebf90aSShri Abhyankar } 387a6053eceSJunchao Zhang mumps->irn = row; 388a6053eceSJunchao Zhang mumps->jcn = col; 389a6053eceSJunchao Zhang mumps->nnz = nz; 39016ebf90aSShri Abhyankar } 3919566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 3923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 39316ebf90aSShri Abhyankar } 394397b6df1SKris Buschelman 395d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 396d71ae5a4SJacob Faibussowitsch { 397a6053eceSJunchao Zhang PetscInt64 nz, i, j, k, r; 3987ee00b23SStefano Zampini Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 399a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 4007ee00b23SStefano Zampini 4017ee00b23SStefano Zampini PetscFunctionBegin; 402a6053eceSJunchao Zhang mumps->val = a->val; 4037ee00b23SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 4047ee00b23SStefano Zampini nz = a->sliidx[a->totalslices]; 4059566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 406a6053eceSJunchao Zhang for (i = k = 0; i < a->totalslices; i++) { 40748a46eb9SPierre Jolivet for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 4087ee00b23SStefano Zampini } 4099566063dSJacob Faibussowitsch for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 410a6053eceSJunchao Zhang mumps->irn = row; 411a6053eceSJunchao Zhang mumps->jcn = col; 412a6053eceSJunchao Zhang mumps->nnz = nz; 4137ee00b23SStefano Zampini } 4143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4157ee00b23SStefano Zampini } 4167ee00b23SStefano Zampini 417d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 418d71ae5a4SJacob Faibussowitsch { 41967877ebaSShri Abhyankar Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 42033d57670SJed Brown const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 421a6053eceSJunchao Zhang PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 422a6053eceSJunchao Zhang PetscInt bs; 423a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 42467877ebaSShri Abhyankar 42567877ebaSShri Abhyankar PetscFunctionBegin; 4269566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 42733d57670SJed Brown M = A->rmap->N / bs; 428a6053eceSJunchao Zhang mumps->val = aa->a; 429bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 4309371c9d4SSatish Balay ai = aa->i; 4319371c9d4SSatish Balay aj = aa->j; 43267877ebaSShri Abhyankar nz = bs2 * aa->nz; 4339566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 43467877ebaSShri Abhyankar for (i = 0; i < M; i++) { 43567877ebaSShri Abhyankar ajj = aj + ai[i]; 43667877ebaSShri Abhyankar rnz = ai[i + 1] - ai[i]; 43767877ebaSShri Abhyankar for (k = 0; k < rnz; k++) { 43867877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 43967877ebaSShri Abhyankar for (m = 0; m < bs; m++) { 4409566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 4419566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 442a6053eceSJunchao Zhang idx++; 44367877ebaSShri Abhyankar } 44467877ebaSShri Abhyankar } 44567877ebaSShri Abhyankar } 44667877ebaSShri Abhyankar } 447a6053eceSJunchao Zhang mumps->irn = row; 448a6053eceSJunchao Zhang mumps->jcn = col; 449a6053eceSJunchao Zhang mumps->nnz = nz; 45067877ebaSShri Abhyankar } 4513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 45267877ebaSShri Abhyankar } 45367877ebaSShri Abhyankar 454d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 455d71ae5a4SJacob Faibussowitsch { 45675480915SPierre Jolivet const PetscInt *ai, *aj, *ajj; 457a6053eceSJunchao Zhang PetscInt bs; 458a6053eceSJunchao Zhang PetscInt64 nz, rnz, i, j, k, m; 459a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 46075480915SPierre Jolivet PetscScalar *val; 46116ebf90aSShri Abhyankar Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 46275480915SPierre Jolivet const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 46338548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 464b94d7dedSBarry Smith PetscBool isset, hermitian; 46538548759SBarry Smith #endif 46616ebf90aSShri Abhyankar 46716ebf90aSShri Abhyankar PetscFunctionBegin; 46838548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 469b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 470b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 47138548759SBarry Smith #endif 4722205254eSKarl Rupp ai = aa->i; 4732205254eSKarl Rupp aj = aa->j; 4749566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 47575480915SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 476f3fa974cSJacob Faibussowitsch const PetscInt64 alloc_size = aa->nz * bs2; 477f3fa974cSJacob Faibussowitsch 478f3fa974cSJacob Faibussowitsch PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 479a6053eceSJunchao Zhang if (bs > 1) { 480f3fa974cSJacob Faibussowitsch PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 481a6053eceSJunchao Zhang mumps->val = mumps->val_alloc; 48275480915SPierre Jolivet } else { 483a6053eceSJunchao Zhang mumps->val = aa->a; 48475480915SPierre Jolivet } 485a6053eceSJunchao Zhang mumps->irn = row; 486a6053eceSJunchao Zhang mumps->jcn = col; 487a6053eceSJunchao Zhang } else { 488a6053eceSJunchao Zhang if (bs == 1) mumps->val = aa->a; 489a6053eceSJunchao Zhang row = mumps->irn; 490a6053eceSJunchao Zhang col = mumps->jcn; 491a6053eceSJunchao Zhang } 492a6053eceSJunchao Zhang val = mumps->val; 493185f6596SHong Zhang 49416ebf90aSShri Abhyankar nz = 0; 495a81fe166SPierre Jolivet if (bs > 1) { 49675480915SPierre Jolivet for (i = 0; i < mbs; i++) { 49716ebf90aSShri Abhyankar rnz = ai[i + 1] - ai[i]; 49867877ebaSShri Abhyankar ajj = aj + ai[i]; 49975480915SPierre Jolivet for (j = 0; j < rnz; j++) { 50075480915SPierre Jolivet for (k = 0; k < bs; k++) { 50175480915SPierre Jolivet for (m = 0; m < bs; m++) { 502ec4f40fdSPierre Jolivet if (ajj[j] > i || k >= m) { 50375480915SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 5049566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 5059566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 50675480915SPierre Jolivet } 50775480915SPierre Jolivet val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 50875480915SPierre Jolivet } 50975480915SPierre Jolivet } 51075480915SPierre Jolivet } 51175480915SPierre Jolivet } 51275480915SPierre Jolivet } 513a81fe166SPierre Jolivet } else if (reuse == MAT_INITIAL_MATRIX) { 514a81fe166SPierre Jolivet for (i = 0; i < mbs; i++) { 515a81fe166SPierre Jolivet rnz = ai[i + 1] - ai[i]; 516a81fe166SPierre Jolivet ajj = aj + ai[i]; 517a81fe166SPierre Jolivet for (j = 0; j < rnz; j++) { 5189566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 5199566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 520a6053eceSJunchao Zhang nz++; 521a81fe166SPierre Jolivet } 522a81fe166SPierre Jolivet } 52308401ef6SPierre Jolivet PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 52475480915SPierre Jolivet } 525a6053eceSJunchao Zhang if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 5263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 52716ebf90aSShri Abhyankar } 52816ebf90aSShri Abhyankar 529d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 530d71ae5a4SJacob Faibussowitsch { 53167877ebaSShri Abhyankar const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 532a6053eceSJunchao Zhang PetscInt64 nz, rnz, i, j; 53367877ebaSShri Abhyankar const PetscScalar *av, *v1; 53416ebf90aSShri Abhyankar PetscScalar *val; 535a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 536829b1710SHong Zhang Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 53729b521d4Sstefano_zampini PetscBool missing; 53838548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 539b94d7dedSBarry Smith PetscBool hermitian, isset; 54038548759SBarry Smith #endif 54116ebf90aSShri Abhyankar 54216ebf90aSShri Abhyankar PetscFunctionBegin; 54338548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 544b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 545b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 54638548759SBarry Smith #endif 5479566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A, &av)); 5489371c9d4SSatish Balay ai = aa->i; 5499371c9d4SSatish Balay aj = aa->j; 55016ebf90aSShri Abhyankar adiag = aa->diag; 5519566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 552bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 5537ee00b23SStefano Zampini /* count nz in the upper triangular part of A */ 554829b1710SHong Zhang nz = 0; 55529b521d4Sstefano_zampini if (missing) { 55629b521d4Sstefano_zampini for (i = 0; i < M; i++) { 55729b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 55829b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 55929b521d4Sstefano_zampini if (aj[j] < i) continue; 56029b521d4Sstefano_zampini nz++; 56129b521d4Sstefano_zampini } 56229b521d4Sstefano_zampini } else { 56329b521d4Sstefano_zampini nz += ai[i + 1] - adiag[i]; 56429b521d4Sstefano_zampini } 56529b521d4Sstefano_zampini } 56629b521d4Sstefano_zampini } else { 567829b1710SHong Zhang for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 56829b521d4Sstefano_zampini } 5699566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 5709566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 571a6053eceSJunchao Zhang mumps->nnz = nz; 572a6053eceSJunchao Zhang mumps->irn = row; 573a6053eceSJunchao Zhang mumps->jcn = col; 574a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 575185f6596SHong Zhang 57616ebf90aSShri Abhyankar nz = 0; 57729b521d4Sstefano_zampini if (missing) { 57829b521d4Sstefano_zampini for (i = 0; i < M; i++) { 57929b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 58029b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 58129b521d4Sstefano_zampini if (aj[j] < i) continue; 5829566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 5839566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 58429b521d4Sstefano_zampini val[nz] = av[j]; 58529b521d4Sstefano_zampini nz++; 58629b521d4Sstefano_zampini } 58729b521d4Sstefano_zampini } else { 58829b521d4Sstefano_zampini rnz = ai[i + 1] - adiag[i]; 58929b521d4Sstefano_zampini ajj = aj + adiag[i]; 59029b521d4Sstefano_zampini v1 = av + adiag[i]; 59129b521d4Sstefano_zampini for (j = 0; j < rnz; j++) { 5929566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 5939566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 594a6053eceSJunchao Zhang val[nz++] = v1[j]; 59529b521d4Sstefano_zampini } 59629b521d4Sstefano_zampini } 59729b521d4Sstefano_zampini } 59829b521d4Sstefano_zampini } else { 59916ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 60016ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 60167877ebaSShri Abhyankar ajj = aj + adiag[i]; 602cf3759fdSShri Abhyankar v1 = av + adiag[i]; 60367877ebaSShri Abhyankar for (j = 0; j < rnz; j++) { 6049566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 6059566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 606a6053eceSJunchao Zhang val[nz++] = v1[j]; 60716ebf90aSShri Abhyankar } 60816ebf90aSShri Abhyankar } 60929b521d4Sstefano_zampini } 610397b6df1SKris Buschelman } else { 611a6053eceSJunchao Zhang nz = 0; 612a6053eceSJunchao Zhang val = mumps->val; 61329b521d4Sstefano_zampini if (missing) { 61416ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 61529b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 61629b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 61729b521d4Sstefano_zampini if (aj[j] < i) continue; 61829b521d4Sstefano_zampini val[nz++] = av[j]; 61929b521d4Sstefano_zampini } 62029b521d4Sstefano_zampini } else { 62116ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 62267877ebaSShri Abhyankar v1 = av + adiag[i]; 623ad540459SPierre Jolivet for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 62416ebf90aSShri Abhyankar } 62516ebf90aSShri Abhyankar } 62629b521d4Sstefano_zampini } else { 62716ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 62816ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 62916ebf90aSShri Abhyankar v1 = av + adiag[i]; 630ad540459SPierre Jolivet for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 63116ebf90aSShri Abhyankar } 63216ebf90aSShri Abhyankar } 63329b521d4Sstefano_zampini } 6349566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 6353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 63616ebf90aSShri Abhyankar } 63716ebf90aSShri Abhyankar 638d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 639d71ae5a4SJacob Faibussowitsch { 640a6053eceSJunchao Zhang const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 641a6053eceSJunchao Zhang PetscInt bs; 642a6053eceSJunchao Zhang PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 643a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 64416ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 64516ebf90aSShri Abhyankar PetscScalar *val; 646397b6df1SKris Buschelman Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 647397b6df1SKris Buschelman Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 648397b6df1SKris Buschelman Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 649ec4f40fdSPierre Jolivet const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 65038548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 651b94d7dedSBarry Smith PetscBool hermitian, isset; 65238548759SBarry Smith #endif 65316ebf90aSShri Abhyankar 65416ebf90aSShri Abhyankar PetscFunctionBegin; 65538548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 656b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 657b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 65838548759SBarry Smith #endif 6599566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 66038548759SBarry Smith rstart = A->rmap->rstart; 66138548759SBarry Smith ai = aa->i; 66238548759SBarry Smith aj = aa->j; 66338548759SBarry Smith bi = bb->i; 66438548759SBarry Smith bj = bb->j; 66538548759SBarry Smith av = aa->a; 66638548759SBarry Smith bv = bb->a; 667397b6df1SKris Buschelman 6682205254eSKarl Rupp garray = mat->garray; 6692205254eSKarl Rupp 670bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 671a6053eceSJunchao Zhang nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 6729566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 6739566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 674a6053eceSJunchao Zhang /* can not decide the exact mumps->nnz now because of the SBAIJ */ 675a6053eceSJunchao Zhang mumps->irn = row; 676a6053eceSJunchao Zhang mumps->jcn = col; 677a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 678397b6df1SKris Buschelman } else { 679a6053eceSJunchao Zhang val = mumps->val; 680397b6df1SKris Buschelman } 681397b6df1SKris Buschelman 6829371c9d4SSatish Balay jj = 0; 6839371c9d4SSatish Balay irow = rstart; 684ec4f40fdSPierre Jolivet for (i = 0; i < mbs; i++) { 685397b6df1SKris Buschelman ajj = aj + ai[i]; /* ptr to the beginning of this row */ 686397b6df1SKris Buschelman countA = ai[i + 1] - ai[i]; 687397b6df1SKris Buschelman countB = bi[i + 1] - bi[i]; 688397b6df1SKris Buschelman bjj = bj + bi[i]; 689ec4f40fdSPierre Jolivet v1 = av + ai[i] * bs2; 690ec4f40fdSPierre Jolivet v2 = bv + bi[i] * bs2; 691397b6df1SKris Buschelman 692ec4f40fdSPierre Jolivet if (bs > 1) { 693ec4f40fdSPierre Jolivet /* A-part */ 694ec4f40fdSPierre Jolivet for (j = 0; j < countA; j++) { 695ec4f40fdSPierre Jolivet for (k = 0; k < bs; k++) { 696ec4f40fdSPierre Jolivet for (m = 0; m < bs; m++) { 697ec4f40fdSPierre Jolivet if (rstart + ajj[j] * bs > irow || k >= m) { 698ec4f40fdSPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 6999566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 7009566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 701ec4f40fdSPierre Jolivet } 702ec4f40fdSPierre Jolivet val[jj++] = v1[j * bs2 + m + k * bs]; 703ec4f40fdSPierre Jolivet } 704ec4f40fdSPierre Jolivet } 705ec4f40fdSPierre Jolivet } 706ec4f40fdSPierre Jolivet } 707ec4f40fdSPierre Jolivet 708ec4f40fdSPierre Jolivet /* B-part */ 709ec4f40fdSPierre Jolivet for (j = 0; j < countB; j++) { 710ec4f40fdSPierre Jolivet for (k = 0; k < bs; k++) { 711ec4f40fdSPierre Jolivet for (m = 0; m < bs; m++) { 712ec4f40fdSPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 7139566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 7149566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 715ec4f40fdSPierre Jolivet } 716ec4f40fdSPierre Jolivet val[jj++] = v2[j * bs2 + m + k * bs]; 717ec4f40fdSPierre Jolivet } 718ec4f40fdSPierre Jolivet } 719ec4f40fdSPierre Jolivet } 720ec4f40fdSPierre Jolivet } else { 721397b6df1SKris Buschelman /* A-part */ 722397b6df1SKris Buschelman for (j = 0; j < countA; j++) { 723bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 7249566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 7259566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 726397b6df1SKris Buschelman } 72716ebf90aSShri Abhyankar val[jj++] = v1[j]; 728397b6df1SKris Buschelman } 72916ebf90aSShri Abhyankar 73016ebf90aSShri Abhyankar /* B-part */ 73116ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 732bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 7339566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 7349566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 735397b6df1SKris Buschelman } 73616ebf90aSShri Abhyankar val[jj++] = v2[j]; 73716ebf90aSShri Abhyankar } 73816ebf90aSShri Abhyankar } 739ec4f40fdSPierre Jolivet irow += bs; 740ec4f40fdSPierre Jolivet } 741a6053eceSJunchao Zhang mumps->nnz = jj; 7423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 74316ebf90aSShri Abhyankar } 74416ebf90aSShri Abhyankar 745d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 746d71ae5a4SJacob Faibussowitsch { 74716ebf90aSShri Abhyankar const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 748a6053eceSJunchao Zhang PetscInt64 rstart, nz, i, j, jj, irow, countA, countB; 749a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 75016ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 75116ebf90aSShri Abhyankar PetscScalar *val; 752a3d589ffSStefano Zampini Mat Ad, Ao; 753a3d589ffSStefano Zampini Mat_SeqAIJ *aa; 754a3d589ffSStefano Zampini Mat_SeqAIJ *bb; 75516ebf90aSShri Abhyankar 75616ebf90aSShri Abhyankar PetscFunctionBegin; 7579566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 7589566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 7599566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 760a3d589ffSStefano Zampini 761a3d589ffSStefano Zampini aa = (Mat_SeqAIJ *)(Ad)->data; 762a3d589ffSStefano Zampini bb = (Mat_SeqAIJ *)(Ao)->data; 76338548759SBarry Smith ai = aa->i; 76438548759SBarry Smith aj = aa->j; 76538548759SBarry Smith bi = bb->i; 76638548759SBarry Smith bj = bb->j; 76716ebf90aSShri Abhyankar 768a3d589ffSStefano Zampini rstart = A->rmap->rstart; 7692205254eSKarl Rupp 770bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 771a6053eceSJunchao Zhang nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 7729566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 7739566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 774a6053eceSJunchao Zhang mumps->nnz = nz; 775a6053eceSJunchao Zhang mumps->irn = row; 776a6053eceSJunchao Zhang mumps->jcn = col; 777a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 77816ebf90aSShri Abhyankar } else { 779a6053eceSJunchao Zhang val = mumps->val; 78016ebf90aSShri Abhyankar } 78116ebf90aSShri Abhyankar 7829371c9d4SSatish Balay jj = 0; 7839371c9d4SSatish Balay irow = rstart; 78416ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 78516ebf90aSShri Abhyankar ajj = aj + ai[i]; /* ptr to the beginning of this row */ 78616ebf90aSShri Abhyankar countA = ai[i + 1] - ai[i]; 78716ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 78816ebf90aSShri Abhyankar bjj = bj + bi[i]; 78916ebf90aSShri Abhyankar v1 = av + ai[i]; 79016ebf90aSShri Abhyankar v2 = bv + bi[i]; 79116ebf90aSShri Abhyankar 79216ebf90aSShri Abhyankar /* A-part */ 79316ebf90aSShri Abhyankar for (j = 0; j < countA; j++) { 794bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 7959566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 7969566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 79716ebf90aSShri Abhyankar } 79816ebf90aSShri Abhyankar val[jj++] = v1[j]; 79916ebf90aSShri Abhyankar } 80016ebf90aSShri Abhyankar 80116ebf90aSShri Abhyankar /* B-part */ 80216ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 803bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 8049566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 8059566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 80616ebf90aSShri Abhyankar } 80716ebf90aSShri Abhyankar val[jj++] = v2[j]; 80816ebf90aSShri Abhyankar } 80916ebf90aSShri Abhyankar irow++; 81016ebf90aSShri Abhyankar } 8119566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 8129566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 8133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 81416ebf90aSShri Abhyankar } 81516ebf90aSShri Abhyankar 816d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 817d71ae5a4SJacob Faibussowitsch { 81867877ebaSShri Abhyankar Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 81967877ebaSShri Abhyankar Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 82067877ebaSShri Abhyankar Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 82167877ebaSShri Abhyankar const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 822d985c460SShri Abhyankar const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart; 82333d57670SJed Brown const PetscInt bs2 = mat->bs2; 824a6053eceSJunchao Zhang PetscInt bs; 825a6053eceSJunchao Zhang PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 826a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 82767877ebaSShri Abhyankar const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 82867877ebaSShri Abhyankar PetscScalar *val; 82967877ebaSShri Abhyankar 83067877ebaSShri Abhyankar PetscFunctionBegin; 8319566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 832bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 83367877ebaSShri Abhyankar nz = bs2 * (aa->nz + bb->nz); 8349566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 8359566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 836a6053eceSJunchao Zhang mumps->nnz = nz; 837a6053eceSJunchao Zhang mumps->irn = row; 838a6053eceSJunchao Zhang mumps->jcn = col; 839a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 84067877ebaSShri Abhyankar } else { 841a6053eceSJunchao Zhang val = mumps->val; 84267877ebaSShri Abhyankar } 84367877ebaSShri Abhyankar 8449371c9d4SSatish Balay jj = 0; 8459371c9d4SSatish Balay irow = rstart; 84667877ebaSShri Abhyankar for (i = 0; i < mbs; i++) { 84767877ebaSShri Abhyankar countA = ai[i + 1] - ai[i]; 84867877ebaSShri Abhyankar countB = bi[i + 1] - bi[i]; 84967877ebaSShri Abhyankar ajj = aj + ai[i]; 85067877ebaSShri Abhyankar bjj = bj + bi[i]; 85167877ebaSShri Abhyankar v1 = av + bs2 * ai[i]; 85267877ebaSShri Abhyankar v2 = bv + bs2 * bi[i]; 85367877ebaSShri Abhyankar 85467877ebaSShri Abhyankar idx = 0; 85567877ebaSShri Abhyankar /* A-part */ 85667877ebaSShri Abhyankar for (k = 0; k < countA; k++) { 85767877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 85867877ebaSShri Abhyankar for (n = 0; n < bs; n++) { 859bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 8609566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 8619566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj])); 86267877ebaSShri Abhyankar } 86367877ebaSShri Abhyankar val[jj++] = v1[idx++]; 86467877ebaSShri Abhyankar } 86567877ebaSShri Abhyankar } 86667877ebaSShri Abhyankar } 86767877ebaSShri Abhyankar 86867877ebaSShri Abhyankar idx = 0; 86967877ebaSShri Abhyankar /* B-part */ 87067877ebaSShri Abhyankar for (k = 0; k < countB; k++) { 87167877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 87267877ebaSShri Abhyankar for (n = 0; n < bs; n++) { 873bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 8749566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 8759566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 87667877ebaSShri Abhyankar } 877d985c460SShri Abhyankar val[jj++] = v2[idx++]; 87867877ebaSShri Abhyankar } 87967877ebaSShri Abhyankar } 88067877ebaSShri Abhyankar } 881d985c460SShri Abhyankar irow += bs; 88267877ebaSShri Abhyankar } 8833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 88467877ebaSShri Abhyankar } 88567877ebaSShri Abhyankar 886d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 887d71ae5a4SJacob Faibussowitsch { 88816ebf90aSShri Abhyankar const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 889a6053eceSJunchao Zhang PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 890a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 89116ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 89216ebf90aSShri Abhyankar PetscScalar *val; 893a3d589ffSStefano Zampini Mat Ad, Ao; 894a3d589ffSStefano Zampini Mat_SeqAIJ *aa; 895a3d589ffSStefano Zampini Mat_SeqAIJ *bb; 89638548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 897b94d7dedSBarry Smith PetscBool hermitian, isset; 89838548759SBarry Smith #endif 89916ebf90aSShri Abhyankar 90016ebf90aSShri Abhyankar PetscFunctionBegin; 90138548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 902b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 903b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 90438548759SBarry Smith #endif 9059566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 9069566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 9079566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 908a3d589ffSStefano Zampini 909a3d589ffSStefano Zampini aa = (Mat_SeqAIJ *)(Ad)->data; 910a3d589ffSStefano Zampini bb = (Mat_SeqAIJ *)(Ao)->data; 91138548759SBarry Smith ai = aa->i; 91238548759SBarry Smith aj = aa->j; 91338548759SBarry Smith adiag = aa->diag; 91438548759SBarry Smith bi = bb->i; 91538548759SBarry Smith bj = bb->j; 9162205254eSKarl Rupp 91716ebf90aSShri Abhyankar rstart = A->rmap->rstart; 91816ebf90aSShri Abhyankar 919bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 920e0bace9bSHong Zhang nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 921e0bace9bSHong Zhang nzb = 0; /* num of upper triangular entries in mat->B */ 92216ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 923e0bace9bSHong Zhang nza += (ai[i + 1] - adiag[i]); 92416ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 92516ebf90aSShri Abhyankar bjj = bj + bi[i]; 926e0bace9bSHong Zhang for (j = 0; j < countB; j++) { 927e0bace9bSHong Zhang if (garray[bjj[j]] > rstart) nzb++; 928e0bace9bSHong Zhang } 929e0bace9bSHong Zhang } 93016ebf90aSShri Abhyankar 931e0bace9bSHong Zhang nz = nza + nzb; /* total nz of upper triangular part of mat */ 9329566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 9339566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 934a6053eceSJunchao Zhang mumps->nnz = nz; 935a6053eceSJunchao Zhang mumps->irn = row; 936a6053eceSJunchao Zhang mumps->jcn = col; 937a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 93816ebf90aSShri Abhyankar } else { 939a6053eceSJunchao Zhang val = mumps->val; 94016ebf90aSShri Abhyankar } 94116ebf90aSShri Abhyankar 9429371c9d4SSatish Balay jj = 0; 9439371c9d4SSatish Balay irow = rstart; 94416ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 94516ebf90aSShri Abhyankar ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 94616ebf90aSShri Abhyankar v1 = av + adiag[i]; 94716ebf90aSShri Abhyankar countA = ai[i + 1] - adiag[i]; 94816ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 94916ebf90aSShri Abhyankar bjj = bj + bi[i]; 95016ebf90aSShri Abhyankar v2 = bv + bi[i]; 95116ebf90aSShri Abhyankar 95216ebf90aSShri Abhyankar /* A-part */ 95316ebf90aSShri Abhyankar for (j = 0; j < countA; j++) { 954bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 9559566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 9569566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 95716ebf90aSShri Abhyankar } 95816ebf90aSShri Abhyankar val[jj++] = v1[j]; 95916ebf90aSShri Abhyankar } 96016ebf90aSShri Abhyankar 96116ebf90aSShri Abhyankar /* B-part */ 96216ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 96316ebf90aSShri Abhyankar if (garray[bjj[j]] > rstart) { 964bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 9659566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 9669566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 96716ebf90aSShri Abhyankar } 96816ebf90aSShri Abhyankar val[jj++] = v2[j]; 96916ebf90aSShri Abhyankar } 970397b6df1SKris Buschelman } 971397b6df1SKris Buschelman irow++; 972397b6df1SKris Buschelman } 9739566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 9749566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 9753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 976397b6df1SKris Buschelman } 977397b6df1SKris Buschelman 978*9d0448ceSStefano Zampini PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 979*9d0448ceSStefano Zampini { 980*9d0448ceSStefano Zampini Mat **mats; 981*9d0448ceSStefano Zampini PetscInt nr, nc; 982*9d0448ceSStefano Zampini PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 983*9d0448ceSStefano Zampini 984*9d0448ceSStefano Zampini PetscFunctionBegin; 985*9d0448ceSStefano Zampini PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 986*9d0448ceSStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 987*9d0448ceSStefano Zampini PetscMUMPSInt *irns, *jcns; 988*9d0448ceSStefano Zampini PetscScalar *vals; 989*9d0448ceSStefano Zampini PetscInt64 totnnz, cumnnz, maxnnz; 990*9d0448ceSStefano Zampini PetscInt *pjcns_w; 991*9d0448ceSStefano Zampini IS *rows, *cols; 992*9d0448ceSStefano Zampini PetscInt **rows_idx, **cols_idx; 993*9d0448ceSStefano Zampini 994*9d0448ceSStefano Zampini cumnnz = 0; 995*9d0448ceSStefano Zampini maxnnz = 0; 996*9d0448ceSStefano Zampini PetscCall(PetscMalloc2(nr * nc, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 997*9d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 998*9d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 999*9d0448ceSStefano Zampini Mat sub = mats[r][c]; 1000*9d0448ceSStefano Zampini 1001*9d0448ceSStefano Zampini mumps->nest_convert_to_triples[r * nc + c] = NULL; 1002*9d0448ceSStefano Zampini if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1003*9d0448ceSStefano Zampini if (sub) { 1004*9d0448ceSStefano Zampini PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1005*9d0448ceSStefano Zampini PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ; 1006*9d0448ceSStefano Zampini MatInfo info; 1007*9d0448ceSStefano Zampini 1008*9d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1009*9d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1010*9d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1011*9d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1012*9d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1013*9d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1014*9d0448ceSStefano Zampini 1015*9d0448ceSStefano Zampini if (chol) { 1016*9d0448ceSStefano Zampini if (r == c) { 1017*9d0448ceSStefano Zampini if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1018*9d0448ceSStefano Zampini else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1019*9d0448ceSStefano Zampini else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1020*9d0448ceSStefano Zampini else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1021*9d0448ceSStefano Zampini } else { 1022*9d0448ceSStefano Zampini if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1023*9d0448ceSStefano Zampini else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1024*9d0448ceSStefano Zampini else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1025*9d0448ceSStefano Zampini else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1026*9d0448ceSStefano Zampini } 1027*9d0448ceSStefano Zampini } else { 1028*9d0448ceSStefano Zampini if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1029*9d0448ceSStefano Zampini else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1030*9d0448ceSStefano Zampini else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1031*9d0448ceSStefano Zampini else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1032*9d0448ceSStefano Zampini } 1033*9d0448ceSStefano Zampini PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1034*9d0448ceSStefano Zampini mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1035*9d0448ceSStefano Zampini PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1036*9d0448ceSStefano Zampini cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1037*9d0448ceSStefano Zampini maxnnz = PetscMax(maxnnz, info.nz_used); 1038*9d0448ceSStefano Zampini } 1039*9d0448ceSStefano Zampini } 1040*9d0448ceSStefano Zampini } 1041*9d0448ceSStefano Zampini 1042*9d0448ceSStefano Zampini /* Allocate total COO */ 1043*9d0448ceSStefano Zampini totnnz = cumnnz; 1044*9d0448ceSStefano Zampini PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1045*9d0448ceSStefano Zampini PetscCall(PetscMalloc1(totnnz, &vals)); 1046*9d0448ceSStefano Zampini 1047*9d0448ceSStefano Zampini /* Handle rows and column maps 1048*9d0448ceSStefano Zampini We directly map rows and use an SF for the columns */ 1049*9d0448ceSStefano Zampini PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1050*9d0448ceSStefano Zampini PetscCall(MatNestGetISs(A, rows, cols)); 1051*9d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1052*9d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1053*9d0448ceSStefano Zampini if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1054*9d0448ceSStefano Zampini else (void)(maxnnz); 1055*9d0448ceSStefano Zampini 1056*9d0448ceSStefano Zampini cumnnz = 0; 1057*9d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 1058*9d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 1059*9d0448ceSStefano Zampini Mat sub = mats[r][c]; 1060*9d0448ceSStefano Zampini const PetscInt *ridx = rows_idx[r]; 1061*9d0448ceSStefano Zampini PetscInt rst; 1062*9d0448ceSStefano Zampini PetscSF csf; 1063*9d0448ceSStefano Zampini 1064*9d0448ceSStefano Zampini mumps->nest_vals_start[r * nc + c] = cumnnz; 1065*9d0448ceSStefano Zampini if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1066*9d0448ceSStefano Zampini 1067*9d0448ceSStefano Zampini /* Directly use the mumps datastructure and use C ordering for now */ 1068*9d0448ceSStefano Zampini PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1069*9d0448ceSStefano Zampini 1070*9d0448ceSStefano Zampini /* Import values to full COO */ 1071*9d0448ceSStefano Zampini PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1072*9d0448ceSStefano Zampini 1073*9d0448ceSStefano Zampini /* Direct map of rows */ 1074*9d0448ceSStefano Zampini PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1075*9d0448ceSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1076*9d0448ceSStefano Zampini 1077*9d0448ceSStefano Zampini /* Communicate column indices */ 1078*9d0448ceSStefano Zampini if (PetscDefined(USE_64BIT_INDICES)) { 1079*9d0448ceSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1080*9d0448ceSStefano Zampini } else { 1081*9d0448ceSStefano Zampini pjcns_w = (PetscInt *)(jcns + cumnnz); /* This cast is needed only to silence warnings for 64bit integers builds */ 1082*9d0448ceSStefano Zampini PetscCall(PetscArraycpy(pjcns_w, mumps->jcn, mumps->nnz)); 1083*9d0448ceSStefano Zampini } 1084*9d0448ceSStefano Zampini 1085*9d0448ceSStefano Zampini /* This could have been done with a single SF but it would have complicated the code a lot. */ 1086*9d0448ceSStefano Zampini PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1087*9d0448ceSStefano Zampini PetscCall(PetscSFSetGraphLayout(csf, sub->cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1088*9d0448ceSStefano Zampini PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cols_idx[c], pjcns_w, MPI_REPLACE)); 1089*9d0448ceSStefano Zampini PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cols_idx[c], pjcns_w, MPI_REPLACE)); 1090*9d0448ceSStefano Zampini PetscCall(PetscSFDestroy(&csf)); 1091*9d0448ceSStefano Zampini 1092*9d0448ceSStefano Zampini /* Apply Fortran index shifting for columns */ 1093*9d0448ceSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1094*9d0448ceSStefano Zampini 1095*9d0448ceSStefano Zampini /* Shift new starting point and sanity check */ 1096*9d0448ceSStefano Zampini cumnnz += mumps->nnz; 1097*9d0448ceSStefano Zampini PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1098*9d0448ceSStefano Zampini 1099*9d0448ceSStefano Zampini /* Free scratch memory */ 1100*9d0448ceSStefano Zampini PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1101*9d0448ceSStefano Zampini PetscCall(PetscFree(mumps->val_alloc)); 1102*9d0448ceSStefano Zampini mumps->val = NULL; 1103*9d0448ceSStefano Zampini mumps->nnz = 0; 1104*9d0448ceSStefano Zampini } 1105*9d0448ceSStefano Zampini } 1106*9d0448ceSStefano Zampini if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1107*9d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1108*9d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1109*9d0448ceSStefano Zampini PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1110*9d0448ceSStefano Zampini if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1111*9d0448ceSStefano Zampini 1112*9d0448ceSStefano Zampini /* Set pointers for final MUMPS data structure */ 1113*9d0448ceSStefano Zampini mumps->nest_vals = vals; 1114*9d0448ceSStefano Zampini mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1115*9d0448ceSStefano Zampini mumps->val = vals; 1116*9d0448ceSStefano Zampini mumps->irn = irns; 1117*9d0448ceSStefano Zampini mumps->jcn = jcns; 1118*9d0448ceSStefano Zampini mumps->nnz = cumnnz; 1119*9d0448ceSStefano Zampini } else { 1120*9d0448ceSStefano Zampini PetscScalar *oval = mumps->nest_vals; 1121*9d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 1122*9d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 1123*9d0448ceSStefano Zampini if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1124*9d0448ceSStefano Zampini mumps->val = oval + mumps->nest_vals_start[r * nc + c]; 1125*9d0448ceSStefano Zampini PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(mats[r][c], shift, MAT_REUSE_MATRIX, mumps)); 1126*9d0448ceSStefano Zampini } 1127*9d0448ceSStefano Zampini } 1128*9d0448ceSStefano Zampini mumps->val = oval; 1129*9d0448ceSStefano Zampini } 1130*9d0448ceSStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 1131*9d0448ceSStefano Zampini } 1132*9d0448ceSStefano Zampini 1133d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_MUMPS(Mat A) 1134d71ae5a4SJacob Faibussowitsch { 1135a6053eceSJunchao Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1136b24902e0SBarry Smith 1137397b6df1SKris Buschelman PetscFunctionBegin; 11389566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 11399566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 11409566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 11419566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->b_seq)); 11429566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->x_seq)); 11439566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.perm_in)); 11449566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 11459566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->val_alloc)); 11469566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->info)); 1147413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->ICNTL_pre)); 1148413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->CNTL_pre)); 11499566063dSJacob Faibussowitsch PetscCall(MatMumpsResetSchur_Private(mumps)); 1150413bcc21SPierre Jolivet if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1151a5e57a09SHong Zhang mumps->id.job = JOB_END; 11523ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 115308401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1154413bcc21SPierre Jolivet if (mumps->mumps_comm != MPI_COMM_NULL) { 1155413bcc21SPierre Jolivet if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1156413bcc21SPierre Jolivet else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1157413bcc21SPierre Jolivet } 1158413bcc21SPierre Jolivet } 11593ab56b82SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 116067602552SJunchao Zhang if (mumps->use_petsc_omp_support) { 11619566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 11629566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 11639566063dSJacob Faibussowitsch PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 116467602552SJunchao Zhang } 11653ab56b82SJunchao Zhang #endif 11669566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ia_alloc)); 11679566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ja_alloc)); 11689566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->recvcount)); 11699566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->reqs)); 11709566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->irhs_loc)); 1171*9d0448ceSStefano Zampini PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1172*9d0448ceSStefano Zampini PetscCall(PetscFree(mumps->nest_vals)); 11739566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1174bf0cc555SLisandro Dalcin 117597969023SHong Zhang /* clear composed functions */ 11769566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 11779566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 11789566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 11799566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 11809566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 11819566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 11829566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 11839566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 11849566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 11859566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 11869566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 11875c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 11889566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 11899566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 11903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1191397b6df1SKris Buschelman } 1192397b6df1SKris Buschelman 119367602552SJunchao Zhang /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1194d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1195d71ae5a4SJacob Faibussowitsch { 119667602552SJunchao Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 119767602552SJunchao Zhang const PetscMPIInt ompsize = mumps->omp_comm_size; 119867602552SJunchao Zhang PetscInt i, m, M, rstart; 119967602552SJunchao Zhang 120067602552SJunchao Zhang PetscFunctionBegin; 12019566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &M, NULL)); 12029566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &m, NULL)); 120308401ef6SPierre Jolivet PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 120467602552SJunchao Zhang if (ompsize == 1) { 120567602552SJunchao Zhang if (!mumps->irhs_loc) { 120667602552SJunchao Zhang mumps->nloc_rhs = m; 12079566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 12089566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 120967602552SJunchao Zhang for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 121067602552SJunchao Zhang } 121167602552SJunchao Zhang mumps->id.rhs_loc = (MumpsScalar *)array; 121267602552SJunchao Zhang } else { 121367602552SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 121467602552SJunchao Zhang const PetscInt *ranges; 121567602552SJunchao Zhang PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 121667602552SJunchao Zhang MPI_Group petsc_group, omp_group; 121767602552SJunchao Zhang PetscScalar *recvbuf = NULL; 121867602552SJunchao Zhang 121967602552SJunchao Zhang if (mumps->is_omp_master) { 122067602552SJunchao Zhang /* Lazily initialize the omp stuff for distributed rhs */ 122167602552SJunchao Zhang if (!mumps->irhs_loc) { 12229566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 12239566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 12249566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 12259566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 122667602552SJunchao Zhang for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 12279566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 122867602552SJunchao Zhang 122967602552SJunchao Zhang /* Populate mumps->irhs_loc[], rhs_nrow[] */ 123067602552SJunchao Zhang mumps->nloc_rhs = 0; 12319566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(A, &ranges)); 123267602552SJunchao Zhang for (j = 0; j < ompsize; j++) { 123367602552SJunchao Zhang mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 123467602552SJunchao Zhang mumps->nloc_rhs += mumps->rhs_nrow[j]; 123567602552SJunchao Zhang } 12369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 123767602552SJunchao Zhang for (j = k = 0; j < ompsize; j++) { 123867602552SJunchao Zhang for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 123967602552SJunchao Zhang } 124067602552SJunchao Zhang 12419566063dSJacob Faibussowitsch PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 12429566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&petsc_group)); 12439566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&omp_group)); 124467602552SJunchao Zhang } 124567602552SJunchao Zhang 124667602552SJunchao Zhang /* Realloc buffers when current nrhs is bigger than what we have met */ 124767602552SJunchao Zhang if (nrhs > mumps->max_nrhs) { 12489566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 12499566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 125067602552SJunchao Zhang mumps->max_nrhs = nrhs; 125167602552SJunchao Zhang } 125267602552SJunchao Zhang 125367602552SJunchao Zhang /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 12549566063dSJacob Faibussowitsch for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 125567602552SJunchao Zhang mumps->rhs_disps[0] = 0; 125667602552SJunchao Zhang for (j = 1; j < ompsize; j++) { 125767602552SJunchao Zhang mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 125808401ef6SPierre Jolivet PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 125967602552SJunchao Zhang } 126067602552SJunchao Zhang recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 126167602552SJunchao Zhang } 126267602552SJunchao Zhang 12639566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 12649566063dSJacob Faibussowitsch PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 126567602552SJunchao Zhang 126667602552SJunchao Zhang if (mumps->is_omp_master) { 126767602552SJunchao Zhang if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 126867602552SJunchao Zhang PetscScalar *dst, *dstbase = mumps->rhs_loc; 126967602552SJunchao Zhang for (j = 0; j < ompsize; j++) { 127067602552SJunchao Zhang const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 127167602552SJunchao Zhang dst = dstbase; 127267602552SJunchao Zhang for (i = 0; i < nrhs; i++) { 12739566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 127467602552SJunchao Zhang src += mumps->rhs_nrow[j]; 127567602552SJunchao Zhang dst += mumps->nloc_rhs; 127667602552SJunchao Zhang } 127767602552SJunchao Zhang dstbase += mumps->rhs_nrow[j]; 127867602552SJunchao Zhang } 127967602552SJunchao Zhang } 128067602552SJunchao Zhang mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 128167602552SJunchao Zhang } 128267602552SJunchao Zhang #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 128367602552SJunchao Zhang } 128467602552SJunchao Zhang mumps->id.nrhs = nrhs; 128567602552SJunchao Zhang mumps->id.nloc_rhs = mumps->nloc_rhs; 128667602552SJunchao Zhang mumps->id.lrhs_loc = mumps->nloc_rhs; 128767602552SJunchao Zhang mumps->id.irhs_loc = mumps->irhs_loc; 12883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 128967602552SJunchao Zhang } 129067602552SJunchao Zhang 1291d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1292d71ae5a4SJacob Faibussowitsch { 1293e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 129425aac85cSJunchao Zhang const PetscScalar *rarray = NULL; 1295d54de34fSKris Buschelman PetscScalar *array; 1296329ec9b3SHong Zhang IS is_iden, is_petsc; 1297329ec9b3SHong Zhang PetscInt i; 1298cc86f929SStefano Zampini PetscBool second_solve = PETSC_FALSE; 1299883f2eb9SBarry Smith static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1300397b6df1SKris Buschelman 1301397b6df1SKris Buschelman PetscFunctionBegin; 13029371c9d4SSatish Balay PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 13039371c9d4SSatish Balay "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 13049371c9d4SSatish Balay &cite1)); 13059371c9d4SSatish Balay PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 13069371c9d4SSatish Balay "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 13079371c9d4SSatish Balay &cite2)); 13082aca8efcSHong Zhang 1309603e8f96SBarry Smith if (A->factorerrortype) { 13109566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 13119566063dSJacob Faibussowitsch PetscCall(VecSetInf(x)); 13123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13132aca8efcSHong Zhang } 13142aca8efcSHong Zhang 1315a5e57a09SHong Zhang mumps->id.nrhs = 1; 13162d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 131725aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 131867602552SJunchao Zhang mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 13199566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(b, &rarray)); 13209566063dSJacob Faibussowitsch PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 132125aac85cSJunchao Zhang } else { 132241ffd417SStefano Zampini mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 13239566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 13249566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 132567602552SJunchao Zhang if (!mumps->myid) { 13269566063dSJacob Faibussowitsch PetscCall(VecGetArray(mumps->b_seq, &array)); 132767602552SJunchao Zhang mumps->id.rhs = (MumpsScalar *)array; 132867602552SJunchao Zhang } 132925aac85cSJunchao Zhang } 13303ab56b82SJunchao Zhang } else { /* petsc_size == 1 */ 133167602552SJunchao Zhang mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 13329566063dSJacob Faibussowitsch PetscCall(VecCopy(b, x)); 13339566063dSJacob Faibussowitsch PetscCall(VecGetArray(x, &array)); 1334940cd9d6SSatish Balay mumps->id.rhs = (MumpsScalar *)array; 1335397b6df1SKris Buschelman } 1336397b6df1SKris Buschelman 1337cc86f929SStefano Zampini /* 1338cc86f929SStefano Zampini handle condensation step of Schur complement (if any) 1339cc86f929SStefano Zampini We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1340cc86f929SStefano Zampini According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1341cc86f929SStefano Zampini Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1342cc86f929SStefano Zampini This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1343cc86f929SStefano Zampini */ 1344583f777eSStefano Zampini if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 134508401ef6SPierre Jolivet PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1346cc86f929SStefano Zampini second_solve = PETSC_TRUE; 13479566063dSJacob Faibussowitsch PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1348cc86f929SStefano Zampini } 1349397b6df1SKris Buschelman /* solve phase */ 1350a5e57a09SHong Zhang mumps->id.job = JOB_SOLVE; 13513ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 135208401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1353397b6df1SKris Buschelman 1354b5fa320bSStefano Zampini /* handle expansion step of Schur complement (if any) */ 13551baa6e33SBarry Smith if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1356b5fa320bSStefano Zampini 13572d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1358a5e57a09SHong Zhang if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1359a5e57a09SHong Zhang /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 13609566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1361397b6df1SKris Buschelman } 1362a5e57a09SHong Zhang if (!mumps->scat_sol) { /* create scatter scat_sol */ 1363a6053eceSJunchao Zhang PetscInt *isol2_loc = NULL; 13649566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 13659566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1366a6053eceSJunchao Zhang for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 13679566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 13689566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 13699566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_iden)); 13709566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_petsc)); 1371a5e57a09SHong Zhang mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1372397b6df1SKris Buschelman } 1373a5e57a09SHong Zhang 13749566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 13759566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1376329ec9b3SHong Zhang } 1377353d7d71SJunchao Zhang 137867602552SJunchao Zhang if (mumps->petsc_size > 1) { 137925aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 13809566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(b, &rarray)); 138125aac85cSJunchao Zhang } else if (!mumps->myid) { 13829566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mumps->b_seq, &array)); 138325aac85cSJunchao Zhang } 13849566063dSJacob Faibussowitsch } else PetscCall(VecRestoreArray(x, &array)); 1385353d7d71SJunchao Zhang 13869566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3))); 13873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1388397b6df1SKris Buschelman } 1389397b6df1SKris Buschelman 1390d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1391d71ae5a4SJacob Faibussowitsch { 1392e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 139351d5961aSHong Zhang 139451d5961aSHong Zhang PetscFunctionBegin; 1395a5e57a09SHong Zhang mumps->id.ICNTL(9) = 0; 13969566063dSJacob Faibussowitsch PetscCall(MatSolve_MUMPS(A, b, x)); 1397a5e57a09SHong Zhang mumps->id.ICNTL(9) = 1; 13983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 139951d5961aSHong Zhang } 140051d5961aSHong Zhang 1401d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1402d71ae5a4SJacob Faibussowitsch { 1403b8491c3eSStefano Zampini Mat Bt = NULL; 1404a6053eceSJunchao Zhang PetscBool denseX, denseB, flg, flgT; 1405e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1406334c5f61SHong Zhang PetscInt i, nrhs, M; 14071683a169SBarry Smith PetscScalar *array; 14081683a169SBarry Smith const PetscScalar *rbray; 1409a6053eceSJunchao Zhang PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1410a6053eceSJunchao Zhang PetscMUMPSInt *isol_loc, *isol_loc_save; 14111683a169SBarry Smith PetscScalar *bray, *sol_loc, *sol_loc_save; 1412be818407SHong Zhang IS is_to, is_from; 1413beae5ec0SHong Zhang PetscInt k, proc, j, m, myrstart; 1414be818407SHong Zhang const PetscInt *rstart; 141567602552SJunchao Zhang Vec v_mpi, msol_loc; 141667602552SJunchao Zhang VecScatter scat_sol; 141767602552SJunchao Zhang Vec b_seq; 141867602552SJunchao Zhang VecScatter scat_rhs; 1419be818407SHong Zhang PetscScalar *aa; 1420be818407SHong Zhang PetscInt spnr, *ia, *ja; 1421d56c302dSHong Zhang Mat_MPIAIJ *b = NULL; 1422bda8bf91SBarry Smith 1423e0b74bf9SHong Zhang PetscFunctionBegin; 14249566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 142528b400f6SJacob Faibussowitsch PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1426be818407SHong Zhang 14279566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1428a6053eceSJunchao Zhang if (denseB) { 142908401ef6SPierre Jolivet PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1430be818407SHong Zhang mumps->id.ICNTL(20) = 0; /* dense RHS */ 14310e6b8875SHong Zhang } else { /* sparse B */ 143208401ef6SPierre Jolivet PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1433013e2dc7SBarry Smith PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1434da81f932SPierre Jolivet if (flgT) { /* input B is transpose of actual RHS matrix, 14350e6b8875SHong Zhang because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 14369566063dSJacob Faibussowitsch PetscCall(MatTransposeGetMat(B, &Bt)); 1437013e2dc7SBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1438be818407SHong Zhang mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1439b8491c3eSStefano Zampini } 144087b22cf4SHong Zhang 14419566063dSJacob Faibussowitsch PetscCall(MatGetSize(B, &M, &nrhs)); 14429481e6e9SHong Zhang mumps->id.nrhs = nrhs; 14439481e6e9SHong Zhang mumps->id.lrhs = M; 14442b691707SHong Zhang mumps->id.rhs = NULL; 14459481e6e9SHong Zhang 14462d4298aeSJunchao Zhang if (mumps->petsc_size == 1) { 1447b8491c3eSStefano Zampini PetscScalar *aa; 1448b8491c3eSStefano Zampini PetscInt spnr, *ia, *ja; 1449e94cce23SStefano Zampini PetscBool second_solve = PETSC_FALSE; 1450b8491c3eSStefano Zampini 14519566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &array)); 1452b8491c3eSStefano Zampini mumps->id.rhs = (MumpsScalar *)array; 14532b691707SHong Zhang 1454a6053eceSJunchao Zhang if (denseB) { 14552b691707SHong Zhang /* copy B to X */ 14569566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &rbray)); 14579566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 14589566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 14592b691707SHong Zhang } else { /* sparse B */ 14609566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(Bt, &aa)); 14619566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 146228b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 14639566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1464b8491c3eSStefano Zampini mumps->id.rhs_sparse = (MumpsScalar *)aa; 1465b8491c3eSStefano Zampini } 1466e94cce23SStefano Zampini /* handle condensation step of Schur complement (if any) */ 1467583f777eSStefano Zampini if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1468e94cce23SStefano Zampini second_solve = PETSC_TRUE; 14699566063dSJacob Faibussowitsch PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1470e94cce23SStefano Zampini } 14712cd7d884SHong Zhang /* solve phase */ 14722cd7d884SHong Zhang mumps->id.job = JOB_SOLVE; 14733ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 147408401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1475b5fa320bSStefano Zampini 1476b5fa320bSStefano Zampini /* handle expansion step of Schur complement (if any) */ 14771baa6e33SBarry Smith if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1478a6053eceSJunchao Zhang if (!denseB) { /* sparse B */ 14799566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 14809566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 148128b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1482b8491c3eSStefano Zampini } 14839566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &array)); 14843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1485be818407SHong Zhang } 1486801fbe65SHong Zhang 14872ef1f0ffSBarry Smith /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1488aed4548fSBarry Smith PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1489241dbb5eSStefano Zampini 1490beae5ec0SHong Zhang /* create msol_loc to hold mumps local solution */ 14911683a169SBarry Smith isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 14921683a169SBarry Smith sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1493801fbe65SHong Zhang 1494a1dfcbd9SJunchao Zhang lsol_loc = mumps->id.lsol_loc; 149571aed81dSHong Zhang nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 14969566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1497940cd9d6SSatish Balay mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1498801fbe65SHong Zhang mumps->id.isol_loc = isol_loc; 1499801fbe65SHong Zhang 15009566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 15012cd7d884SHong Zhang 150267602552SJunchao Zhang if (denseB) { 150325aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 150467602552SJunchao Zhang mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 15059566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &rbray)); 15069566063dSJacob Faibussowitsch PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 15079566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 15089566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(B, &m, NULL)); 15099566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 151025aac85cSJunchao Zhang } else { 151125aac85cSJunchao Zhang mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 151280577c12SJunchao Zhang /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 151380577c12SJunchao Zhang very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 151480577c12SJunchao Zhang 0, re-arrange B into desired order, which is a local operation. 151580577c12SJunchao Zhang */ 151680577c12SJunchao Zhang 151767602552SJunchao Zhang /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1518be818407SHong Zhang /* wrap dense rhs matrix B into a vector v_mpi */ 15199566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(B, &m, NULL)); 15209566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(B, &bray)); 15219566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 15229566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(B, &bray)); 15232b691707SHong Zhang 1524be818407SHong Zhang /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1525801fbe65SHong Zhang if (!mumps->myid) { 1526beae5ec0SHong Zhang PetscInt *idx; 1527beae5ec0SHong Zhang /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 15289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrhs * M, &idx)); 15299566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(B, &rstart)); 1530be818407SHong Zhang k = 0; 15312d4298aeSJunchao Zhang for (proc = 0; proc < mumps->petsc_size; proc++) { 1532be818407SHong Zhang for (j = 0; j < nrhs; j++) { 1533beae5ec0SHong Zhang for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1534be818407SHong Zhang } 1535be818407SHong Zhang } 1536be818407SHong Zhang 15379566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 15389566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 15399566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1540801fbe65SHong Zhang } else { 15419566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 15429566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 15439566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1544801fbe65SHong Zhang } 15459566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 15469566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 15479566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_to)); 15489566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_from)); 15499566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1550801fbe65SHong Zhang 1551801fbe65SHong Zhang if (!mumps->myid) { /* define rhs on the host */ 15529566063dSJacob Faibussowitsch PetscCall(VecGetArray(b_seq, &bray)); 1553940cd9d6SSatish Balay mumps->id.rhs = (MumpsScalar *)bray; 15549566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b_seq, &bray)); 1555801fbe65SHong Zhang } 155625aac85cSJunchao Zhang } 15572b691707SHong Zhang } else { /* sparse B */ 15582b691707SHong Zhang b = (Mat_MPIAIJ *)Bt->data; 15592b691707SHong Zhang 1560be818407SHong Zhang /* wrap dense X into a vector v_mpi */ 15619566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(X, &m, NULL)); 15629566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &bray)); 15639566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 15649566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &bray)); 15652b691707SHong Zhang 15662b691707SHong Zhang if (!mumps->myid) { 15679566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(b->A, &aa)); 15689566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 156928b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 15709566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 15712b691707SHong Zhang mumps->id.rhs_sparse = (MumpsScalar *)aa; 15722b691707SHong Zhang } else { 15732b691707SHong Zhang mumps->id.irhs_ptr = NULL; 15742b691707SHong Zhang mumps->id.irhs_sparse = NULL; 15752b691707SHong Zhang mumps->id.nz_rhs = 0; 15762b691707SHong Zhang mumps->id.rhs_sparse = NULL; 15772b691707SHong Zhang } 15782b691707SHong Zhang } 15792b691707SHong Zhang 1580801fbe65SHong Zhang /* solve phase */ 1581801fbe65SHong Zhang mumps->id.job = JOB_SOLVE; 15823ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 158308401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1584801fbe65SHong Zhang 1585334c5f61SHong Zhang /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 15869566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &array)); 15879566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(v_mpi, array)); 1588801fbe65SHong Zhang 1589334c5f61SHong Zhang /* create scatter scat_sol */ 15909566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(X, &rstart)); 1591beae5ec0SHong Zhang /* iidx: index for scatter mumps solution to petsc X */ 1592beae5ec0SHong Zhang 15939566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 15949566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1595beae5ec0SHong Zhang for (i = 0; i < lsol_loc; i++) { 1596beae5ec0SHong Zhang isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1597beae5ec0SHong Zhang 15982d4298aeSJunchao Zhang for (proc = 0; proc < mumps->petsc_size; proc++) { 1599beae5ec0SHong Zhang if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1600beae5ec0SHong Zhang myrstart = rstart[proc]; 1601beae5ec0SHong Zhang k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1602beae5ec0SHong Zhang iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1603beae5ec0SHong Zhang m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1604beae5ec0SHong Zhang break; 1605be818407SHong Zhang } 1606be818407SHong Zhang } 1607be818407SHong Zhang 1608beae5ec0SHong Zhang for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1609801fbe65SHong Zhang } 16109566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 16119566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 16129566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 16139566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_from)); 16149566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_to)); 16159566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 16169566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &array)); 161771aed81dSHong Zhang 161871aed81dSHong Zhang /* free spaces */ 16191683a169SBarry Smith mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 162071aed81dSHong Zhang mumps->id.isol_loc = isol_loc_save; 162171aed81dSHong Zhang 16229566063dSJacob Faibussowitsch PetscCall(PetscFree2(sol_loc, isol_loc)); 16239566063dSJacob Faibussowitsch PetscCall(PetscFree(idxx)); 16249566063dSJacob Faibussowitsch PetscCall(VecDestroy(&msol_loc)); 16259566063dSJacob Faibussowitsch PetscCall(VecDestroy(&v_mpi)); 1626a6053eceSJunchao Zhang if (!denseB) { 16272b691707SHong Zhang if (!mumps->myid) { 1628d56c302dSHong Zhang b = (Mat_MPIAIJ *)Bt->data; 16299566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 16309566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 163128b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 16322b691707SHong Zhang } 16332b691707SHong Zhang } else { 163425aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { 16359566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b_seq)); 16369566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&scat_rhs)); 163725aac85cSJunchao Zhang } 16382b691707SHong Zhang } 16399566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&scat_sol)); 16409566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3))); 16413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1642e0b74bf9SHong Zhang } 1643e0b74bf9SHong Zhang 1644d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1645d71ae5a4SJacob Faibussowitsch { 1646b18964edSHong Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1647b18964edSHong Zhang PetscMUMPSInt oldvalue = mumps->id.ICNTL(9); 1648b18964edSHong Zhang 1649b18964edSHong Zhang PetscFunctionBegin; 1650b18964edSHong Zhang mumps->id.ICNTL(9) = 0; 1651b18964edSHong Zhang PetscCall(MatMatSolve_MUMPS(A, B, X)); 1652b18964edSHong Zhang mumps->id.ICNTL(9) = oldvalue; 16533ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1654b18964edSHong Zhang } 1655b18964edSHong Zhang 1656d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1657d71ae5a4SJacob Faibussowitsch { 1658eb3ef3b2SHong Zhang PetscBool flg; 1659eb3ef3b2SHong Zhang Mat B; 1660eb3ef3b2SHong Zhang 1661eb3ef3b2SHong Zhang PetscFunctionBegin; 16629566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 166328b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1664eb3ef3b2SHong Zhang 1665eb3ef3b2SHong Zhang /* Create B=Bt^T that uses Bt's data structure */ 16669566063dSJacob Faibussowitsch PetscCall(MatCreateTranspose(Bt, &B)); 1667eb3ef3b2SHong Zhang 16689566063dSJacob Faibussowitsch PetscCall(MatMatSolve_MUMPS(A, B, X)); 16699566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 16703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1671eb3ef3b2SHong Zhang } 1672eb3ef3b2SHong Zhang 1673ace3df97SHong Zhang #if !defined(PETSC_USE_COMPLEX) 1674a58c3f20SHong Zhang /* 1675a58c3f20SHong Zhang input: 1676a58c3f20SHong Zhang F: numeric factor 1677a58c3f20SHong Zhang output: 1678a58c3f20SHong Zhang nneg: total number of negative pivots 167919d49a3bSHong Zhang nzero: total number of zero pivots 168019d49a3bSHong Zhang npos: (global dimension of F) - nneg - nzero 1681a58c3f20SHong Zhang */ 1682d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1683d71ae5a4SJacob Faibussowitsch { 1684e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1685c1490034SHong Zhang PetscMPIInt size; 1686a58c3f20SHong Zhang 1687a58c3f20SHong Zhang PetscFunctionBegin; 16889566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1689bcb30aebSHong Zhang /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1690aed4548fSBarry Smith PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1691ed85ac9fSHong Zhang 1692710ac8efSHong Zhang if (nneg) *nneg = mumps->id.INFOG(12); 1693ed85ac9fSHong Zhang if (nzero || npos) { 169408401ef6SPierre Jolivet PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1695710ac8efSHong Zhang if (nzero) *nzero = mumps->id.INFOG(28); 1696710ac8efSHong Zhang if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1697a58c3f20SHong Zhang } 16983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1699a58c3f20SHong Zhang } 170019d49a3bSHong Zhang #endif 1701a58c3f20SHong Zhang 1702d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1703d71ae5a4SJacob Faibussowitsch { 1704a6053eceSJunchao Zhang PetscInt i, nreqs; 1705a6053eceSJunchao Zhang PetscMUMPSInt *irn, *jcn; 1706a6053eceSJunchao Zhang PetscMPIInt count; 1707a6053eceSJunchao Zhang PetscInt64 totnnz, remain; 1708a6053eceSJunchao Zhang const PetscInt osize = mumps->omp_comm_size; 1709a6053eceSJunchao Zhang PetscScalar *val; 17103ab56b82SJunchao Zhang 17113ab56b82SJunchao Zhang PetscFunctionBegin; 1712a6053eceSJunchao Zhang if (osize > 1) { 17133ab56b82SJunchao Zhang if (reuse == MAT_INITIAL_MATRIX) { 17143ab56b82SJunchao Zhang /* master first gathers counts of nonzeros to receive */ 17159566063dSJacob Faibussowitsch if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 17169566063dSJacob Faibussowitsch PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 17173ab56b82SJunchao Zhang 1718a6053eceSJunchao Zhang /* Then each computes number of send/recvs */ 17193ab56b82SJunchao Zhang if (mumps->is_omp_master) { 1720a6053eceSJunchao Zhang /* Start from 1 since self communication is not done in MPI */ 1721a6053eceSJunchao Zhang nreqs = 0; 1722a6053eceSJunchao Zhang for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1723a6053eceSJunchao Zhang } else { 1724a6053eceSJunchao Zhang nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 17253ab56b82SJunchao Zhang } 172635cb6cd3SPierre Jolivet PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 17273ab56b82SJunchao Zhang 1728a6053eceSJunchao Zhang /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1729a6053eceSJunchao Zhang MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1730a6053eceSJunchao Zhang might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1731a6053eceSJunchao Zhang is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1732a6053eceSJunchao Zhang */ 1733a6053eceSJunchao Zhang nreqs = 0; /* counter for actual send/recvs */ 17343ab56b82SJunchao Zhang if (mumps->is_omp_master) { 1735a6053eceSJunchao Zhang for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 17369566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 17379566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(totnnz, &val)); 1738a6053eceSJunchao Zhang 1739a6053eceSJunchao Zhang /* Self communication */ 17409566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 17419566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 17429566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1743a6053eceSJunchao Zhang 1744a6053eceSJunchao Zhang /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 17459566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 17469566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->val_alloc)); 1747a6053eceSJunchao Zhang mumps->nnz = totnnz; 17483ab56b82SJunchao Zhang mumps->irn = irn; 17493ab56b82SJunchao Zhang mumps->jcn = jcn; 1750a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 1751a6053eceSJunchao Zhang 1752a6053eceSJunchao Zhang irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1753a6053eceSJunchao Zhang jcn += mumps->recvcount[0]; 1754a6053eceSJunchao Zhang val += mumps->recvcount[0]; 1755a6053eceSJunchao Zhang 1756a6053eceSJunchao Zhang /* Remote communication */ 1757a6053eceSJunchao Zhang for (i = 1; i < osize; i++) { 1758a6053eceSJunchao Zhang count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1759a6053eceSJunchao Zhang remain = mumps->recvcount[i] - count; 1760a6053eceSJunchao Zhang while (count > 0) { 17619566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 17629566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 17639566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1764a6053eceSJunchao Zhang irn += count; 1765a6053eceSJunchao Zhang jcn += count; 1766a6053eceSJunchao Zhang val += count; 1767a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1768a6053eceSJunchao Zhang remain -= count; 1769a6053eceSJunchao Zhang } 17703ab56b82SJunchao Zhang } 17713ab56b82SJunchao Zhang } else { 1772a6053eceSJunchao Zhang irn = mumps->irn; 1773a6053eceSJunchao Zhang jcn = mumps->jcn; 1774a6053eceSJunchao Zhang val = mumps->val; 1775a6053eceSJunchao Zhang count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1776a6053eceSJunchao Zhang remain = mumps->nnz - count; 1777a6053eceSJunchao Zhang while (count > 0) { 17789566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 17799566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 17809566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1781a6053eceSJunchao Zhang irn += count; 1782a6053eceSJunchao Zhang jcn += count; 1783a6053eceSJunchao Zhang val += count; 1784a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1785a6053eceSJunchao Zhang remain -= count; 17863ab56b82SJunchao Zhang } 17873ab56b82SJunchao Zhang } 1788a6053eceSJunchao Zhang } else { 1789a6053eceSJunchao Zhang nreqs = 0; 1790a6053eceSJunchao Zhang if (mumps->is_omp_master) { 1791a6053eceSJunchao Zhang val = mumps->val + mumps->recvcount[0]; 1792a6053eceSJunchao Zhang for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1793a6053eceSJunchao Zhang count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1794a6053eceSJunchao Zhang remain = mumps->recvcount[i] - count; 1795a6053eceSJunchao Zhang while (count > 0) { 17969566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1797a6053eceSJunchao Zhang val += count; 1798a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1799a6053eceSJunchao Zhang remain -= count; 1800a6053eceSJunchao Zhang } 1801a6053eceSJunchao Zhang } 1802a6053eceSJunchao Zhang } else { 1803a6053eceSJunchao Zhang val = mumps->val; 1804a6053eceSJunchao Zhang count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1805a6053eceSJunchao Zhang remain = mumps->nnz - count; 1806a6053eceSJunchao Zhang while (count > 0) { 18079566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1808a6053eceSJunchao Zhang val += count; 1809a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1810a6053eceSJunchao Zhang remain -= count; 1811a6053eceSJunchao Zhang } 1812a6053eceSJunchao Zhang } 1813a6053eceSJunchao Zhang } 18149566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1815a6053eceSJunchao Zhang mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1816a6053eceSJunchao Zhang } 18173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18183ab56b82SJunchao Zhang } 18193ab56b82SJunchao Zhang 1820d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1821d71ae5a4SJacob Faibussowitsch { 1822e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1823ace3abfcSBarry Smith PetscBool isMPIAIJ; 1824397b6df1SKris Buschelman 1825397b6df1SKris Buschelman PetscFunctionBegin; 1826dbf6bb8dSprj- if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 182748a46eb9SPierre Jolivet if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 18289566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 18293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18302aca8efcSHong Zhang } 18316baea169SHong Zhang 18329566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 18339566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1834397b6df1SKris Buschelman 1835397b6df1SKris Buschelman /* numerical factorization phase */ 1836a5e57a09SHong Zhang mumps->id.job = JOB_FACTNUMERIC; 18374e34a73bSHong Zhang if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1838ad540459SPierre Jolivet if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1839397b6df1SKris Buschelman } else { 1840940cd9d6SSatish Balay mumps->id.a_loc = (MumpsScalar *)mumps->val; 1841397b6df1SKris Buschelman } 18423ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 1843a5e57a09SHong Zhang if (mumps->id.INFOG(1) < 0) { 18447a46b595SBarry Smith PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 1845c0d63f2fSHong Zhang if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 18469566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1847603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1848c0d63f2fSHong Zhang } else if (mumps->id.INFOG(1) == -13) { 18499566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1850603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1851c0d63f2fSHong Zhang } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 18529566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1853603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 18542aca8efcSHong Zhang } else { 18559566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1856603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OTHER; 1857151787a6SHong Zhang } 18582aca8efcSHong Zhang } 1859aed4548fSBarry Smith PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, " mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16)); 1860397b6df1SKris Buschelman 1861b3cb21ddSStefano Zampini F->assembled = PETSC_TRUE; 1862d47f36abSHong Zhang 1863b3cb21ddSStefano Zampini if (F->schur) { /* reset Schur status to unfactored */ 18643cb7dd0eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 1865c70f7ee4SJunchao Zhang F->schur->offloadmask = PETSC_OFFLOAD_CPU; 18663cb7dd0eSStefano Zampini #endif 1867b3cb21ddSStefano Zampini if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1868b3cb21ddSStefano Zampini mumps->id.ICNTL(19) = 2; 18699566063dSJacob Faibussowitsch PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1870b3cb21ddSStefano Zampini } 18719566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1872b3cb21ddSStefano Zampini } 187367877ebaSShri Abhyankar 1874066565c5SStefano Zampini /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1875066565c5SStefano Zampini if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1876066565c5SStefano Zampini 18773ab56b82SJunchao Zhang if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 18782d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 187967877ebaSShri Abhyankar PetscInt lsol_loc; 188067877ebaSShri Abhyankar PetscScalar *sol_loc; 18812205254eSKarl Rupp 18829566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1883c2093ab7SHong Zhang 1884c2093ab7SHong Zhang /* distributed solution; Create x_seq=sol_loc for repeated use */ 1885c2093ab7SHong Zhang if (mumps->x_seq) { 18869566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 18879566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 18889566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->x_seq)); 1889c2093ab7SHong Zhang } 1890a5e57a09SHong Zhang lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 18919566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1892a5e57a09SHong Zhang mumps->id.lsol_loc = lsol_loc; 1893940cd9d6SSatish Balay mumps->id.sol_loc = (MumpsScalar *)sol_loc; 18949566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 189567877ebaSShri Abhyankar } 18969566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 18973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1898397b6df1SKris Buschelman } 1899397b6df1SKris Buschelman 19009a2535b5SHong Zhang /* Sets MUMPS options from the options database */ 1901d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1902d71ae5a4SJacob Faibussowitsch { 1903e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1904413bcc21SPierre Jolivet PetscMUMPSInt icntl = 0, size, *listvar_schur; 190545e3843bSPierre Jolivet PetscInt info[80], i, ninfo = 80, rbs, cbs; 1906413bcc21SPierre Jolivet PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1907413bcc21SPierre Jolivet MumpsScalar *arr; 1908dcd589f8SShri Abhyankar 1909dcd589f8SShri Abhyankar PetscFunctionBegin; 191026cc229bSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 1911413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1912413bcc21SPierre Jolivet PetscInt nthreads = 0; 1913413bcc21SPierre Jolivet PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1914413bcc21SPierre Jolivet PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1915413bcc21SPierre Jolivet 1916413bcc21SPierre Jolivet mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1917413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 1918413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 1919413bcc21SPierre Jolivet 1920413bcc21SPierre Jolivet PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 1921413bcc21SPierre Jolivet if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1922413bcc21SPierre Jolivet /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1923413bcc21SPierre Jolivet PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 1924413bcc21SPierre Jolivet if (mumps->use_petsc_omp_support) { 19259371c9d4SSatish Balay PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 19269371c9d4SSatish Balay ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1927413bcc21SPierre Jolivet PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1928413bcc21SPierre Jolivet #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1929413bcc21SPierre Jolivet PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 1930413bcc21SPierre Jolivet PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 1931413bcc21SPierre Jolivet #endif 1932413bcc21SPierre Jolivet } else { 1933413bcc21SPierre Jolivet mumps->omp_comm = PETSC_COMM_SELF; 1934413bcc21SPierre Jolivet mumps->mumps_comm = mumps->petsc_comm; 1935413bcc21SPierre Jolivet mumps->is_omp_master = PETSC_TRUE; 1936413bcc21SPierre Jolivet } 1937413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 1938413bcc21SPierre Jolivet mumps->reqs = NULL; 1939413bcc21SPierre Jolivet mumps->tag = 0; 1940413bcc21SPierre Jolivet 1941413bcc21SPierre Jolivet if (mumps->mumps_comm != MPI_COMM_NULL) { 1942413bcc21SPierre Jolivet if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 1943413bcc21SPierre Jolivet /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1944413bcc21SPierre Jolivet MPI_Comm comm; 1945413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 1946413bcc21SPierre Jolivet mumps->mumps_comm = comm; 1947413bcc21SPierre Jolivet } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 1948413bcc21SPierre Jolivet } 1949413bcc21SPierre Jolivet 1950413bcc21SPierre Jolivet mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1951413bcc21SPierre Jolivet mumps->id.job = JOB_INIT; 1952413bcc21SPierre Jolivet mumps->id.par = 1; /* host participates factorizaton and solve */ 1953413bcc21SPierre Jolivet mumps->id.sym = mumps->sym; 1954413bcc21SPierre Jolivet 1955413bcc21SPierre Jolivet size = mumps->id.size_schur; 1956413bcc21SPierre Jolivet arr = mumps->id.schur; 1957413bcc21SPierre Jolivet listvar_schur = mumps->id.listvar_schur; 1958413bcc21SPierre Jolivet PetscMUMPS_c(mumps); 1959413bcc21SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1960413bcc21SPierre Jolivet /* restore cached ICNTL and CNTL values */ 1961413bcc21SPierre Jolivet for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 1962413bcc21SPierre Jolivet for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 1963413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->ICNTL_pre)); 1964413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->CNTL_pre)); 1965413bcc21SPierre Jolivet 1966413bcc21SPierre Jolivet if (schur) { 1967413bcc21SPierre Jolivet mumps->id.size_schur = size; 1968413bcc21SPierre Jolivet mumps->id.schur_lld = size; 1969413bcc21SPierre Jolivet mumps->id.schur = arr; 1970413bcc21SPierre Jolivet mumps->id.listvar_schur = listvar_schur; 1971413bcc21SPierre Jolivet if (mumps->petsc_size > 1) { 1972413bcc21SPierre Jolivet PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 1973413bcc21SPierre Jolivet 1974413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1975413bcc21SPierre Jolivet gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1976712fec58SPierre Jolivet PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 1977413bcc21SPierre Jolivet PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 1978413bcc21SPierre Jolivet } else { 1979413bcc21SPierre Jolivet if (F->factortype == MAT_FACTOR_LU) { 1980413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1981413bcc21SPierre Jolivet } else { 1982413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1983413bcc21SPierre Jolivet } 1984413bcc21SPierre Jolivet } 1985413bcc21SPierre Jolivet mumps->id.ICNTL(26) = -1; 1986413bcc21SPierre Jolivet } 1987413bcc21SPierre Jolivet 1988413bcc21SPierre Jolivet /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1989413bcc21SPierre Jolivet For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1990413bcc21SPierre Jolivet */ 1991413bcc21SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 1992413bcc21SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 1993413bcc21SPierre Jolivet 1994413bcc21SPierre Jolivet mumps->scat_rhs = NULL; 1995413bcc21SPierre Jolivet mumps->scat_sol = NULL; 1996413bcc21SPierre Jolivet 1997413bcc21SPierre Jolivet /* set PETSc-MUMPS default options - override MUMPS default */ 1998413bcc21SPierre Jolivet mumps->id.ICNTL(3) = 0; 1999413bcc21SPierre Jolivet mumps->id.ICNTL(4) = 0; 2000413bcc21SPierre Jolivet if (mumps->petsc_size == 1) { 2001413bcc21SPierre Jolivet mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2002413bcc21SPierre Jolivet mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2003413bcc21SPierre Jolivet } else { 2004413bcc21SPierre Jolivet mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2005413bcc21SPierre Jolivet mumps->id.ICNTL(21) = 1; /* distributed solution */ 2006413bcc21SPierre Jolivet } 2007413bcc21SPierre Jolivet } 20089566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 20099a2535b5SHong Zhang if (flg) mumps->id.ICNTL(1) = icntl; 20109566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 20119a2535b5SHong Zhang if (flg) mumps->id.ICNTL(2) = icntl; 20129566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 20139a2535b5SHong Zhang if (flg) mumps->id.ICNTL(3) = icntl; 2014dcd589f8SShri Abhyankar 20159566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 20169a2535b5SHong Zhang if (flg) mumps->id.ICNTL(4) = icntl; 20179a2535b5SHong Zhang if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 20189a2535b5SHong Zhang 20199566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 20209a2535b5SHong Zhang if (flg) mumps->id.ICNTL(6) = icntl; 20219a2535b5SHong Zhang 20229566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2023dcd589f8SShri Abhyankar if (flg) { 2024aed4548fSBarry Smith PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2025b53c1a7fSBarry Smith mumps->id.ICNTL(7) = icntl; 2026dcd589f8SShri Abhyankar } 2027e0b74bf9SHong Zhang 20289566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 20299566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 20309566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 20319566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 20329566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 20339566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 20349566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 203545e3843bSPierre Jolivet PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 203645e3843bSPierre Jolivet if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 203745e3843bSPierre Jolivet PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 203845e3843bSPierre Jolivet if (flg) { 203945e3843bSPierre Jolivet PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 204045e3843bSPierre Jolivet PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 204145e3843bSPierre Jolivet } 20429566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 204359ac8732SStefano Zampini if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 20449566063dSJacob Faibussowitsch PetscCall(MatDestroy(&F->schur)); 20459566063dSJacob Faibussowitsch PetscCall(MatMumpsResetSchur_Private(mumps)); 204659ac8732SStefano Zampini } 204725aac85cSJunchao Zhang 204843f3b051SJunchao Zhang /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 204943f3b051SJunchao Zhang and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 205025aac85cSJunchao Zhang and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 205143f3b051SJunchao Zhang This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 205243f3b051SJunchao Zhang see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 205343f3b051SJunchao Zhang In short, we could not use distributed RHS with MPICH until v4.0b1. 205425aac85cSJunchao Zhang */ 205543f3b051SJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 205625aac85cSJunchao Zhang mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 205743f3b051SJunchao Zhang #else 205843f3b051SJunchao Zhang mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 205925aac85cSJunchao Zhang #endif 20609566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2061aed4548fSBarry Smith PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 206225aac85cSJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2063aed4548fSBarry Smith PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 206425aac85cSJunchao Zhang #endif 20659566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 20669a2535b5SHong Zhang 20679566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 20689566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 20699566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 20709371c9d4SSatish Balay if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2071d7ebd59bSHong Zhang 20729566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 20739566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 20749566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 20759566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 20769566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 20779566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 20789566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 20799566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 20809566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 20819566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 20829566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 20839566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2084dcd589f8SShri Abhyankar 20859566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 20869566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 20879566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 20889566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 20899566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 20909566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2091e5bb22a1SHong Zhang 20929566063dSJacob Faibussowitsch PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2093b34f08ffSHong Zhang 20949566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2095b34f08ffSHong Zhang if (ninfo) { 209608401ef6SPierre Jolivet PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 20979566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2098b34f08ffSHong Zhang mumps->ninfo = ninfo; 2099b34f08ffSHong Zhang for (i = 0; i < ninfo; i++) { 2100aed4548fSBarry Smith PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2101f7d195e4SLawrence Mitchell mumps->info[i] = info[i]; 2102b34f08ffSHong Zhang } 2103b34f08ffSHong Zhang } 2104d0609cedSBarry Smith PetscOptionsEnd(); 21053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2106dcd589f8SShri Abhyankar } 2107dcd589f8SShri Abhyankar 2108d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2109d71ae5a4SJacob Faibussowitsch { 21105cd7cf9dSHong Zhang PetscFunctionBegin; 21115cd7cf9dSHong Zhang if (mumps->id.INFOG(1) < 0) { 21127a46b595SBarry Smith PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1)); 21135cd7cf9dSHong Zhang if (mumps->id.INFOG(1) == -6) { 21149566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2115603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 21165cd7cf9dSHong Zhang } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 21179566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2118603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2119dbf6bb8dSprj- } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 21209566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "Empty matrix\n")); 21215cd7cf9dSHong Zhang } else { 21229566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2123603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OTHER; 21245cd7cf9dSHong Zhang } 21255cd7cf9dSHong Zhang } 21263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 21275cd7cf9dSHong Zhang } 21285cd7cf9dSHong Zhang 2129d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2130d71ae5a4SJacob Faibussowitsch { 2131e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 213267877ebaSShri Abhyankar Vec b; 213367877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 2134397b6df1SKris Buschelman 2135397b6df1SKris Buschelman PetscFunctionBegin; 2136d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2137d47f36abSHong Zhang /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 21383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2139d47f36abSHong Zhang } 2140dcd589f8SShri Abhyankar 21419a2535b5SHong Zhang /* Set MUMPS options from the options database */ 214226cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 2143dcd589f8SShri Abhyankar 21449566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 21459566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2146dcd589f8SShri Abhyankar 214767877ebaSShri Abhyankar /* analysis phase */ 2148a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 2149a5e57a09SHong Zhang mumps->id.n = M; 2150a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 215167877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 2152a5e57a09SHong Zhang if (!mumps->myid) { 2153a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 2154a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2155a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2156a6053eceSJunchao Zhang if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 21574ac6704cSBarry Smith if (r) { 21584ac6704cSBarry Smith mumps->id.ICNTL(7) = 1; 2159a5e57a09SHong Zhang if (!mumps->myid) { 2160e0b74bf9SHong Zhang const PetscInt *idx; 2161a6053eceSJunchao Zhang PetscInt i; 21622205254eSKarl Rupp 21639566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 21649566063dSJacob Faibussowitsch PetscCall(ISGetIndices(r, &idx)); 21659566063dSJacob Faibussowitsch for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 21669566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(r, &idx)); 2167e0b74bf9SHong Zhang } 2168e0b74bf9SHong Zhang } 216967877ebaSShri Abhyankar } 217067877ebaSShri Abhyankar break; 217167877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2172a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2173a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2174a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2175a6053eceSJunchao Zhang if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 217625aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 21779566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 21789566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 21799566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 218025aac85cSJunchao Zhang } 218167877ebaSShri Abhyankar break; 218267877ebaSShri Abhyankar } 21833ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 21849566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 218567877ebaSShri Abhyankar 2186719d5645SBarry Smith F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2187dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 218851d5961aSHong Zhang F->ops->solvetranspose = MatSolveTranspose_MUMPS; 21894e34a73bSHong Zhang F->ops->matsolve = MatMatSolve_MUMPS; 2190eb3ef3b2SHong Zhang F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2191b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2192d47f36abSHong Zhang 2193d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 21943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2195b24902e0SBarry Smith } 2196b24902e0SBarry Smith 2197450b117fSShri Abhyankar /* Note the Petsc r and c permutations are ignored */ 2198d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2199d71ae5a4SJacob Faibussowitsch { 2200e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 220167877ebaSShri Abhyankar Vec b; 220267877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 2203450b117fSShri Abhyankar 2204450b117fSShri Abhyankar PetscFunctionBegin; 2205d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2206d47f36abSHong Zhang /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 22073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2208d47f36abSHong Zhang } 2209dcd589f8SShri Abhyankar 22109a2535b5SHong Zhang /* Set MUMPS options from the options database */ 221126cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 2212dcd589f8SShri Abhyankar 22139566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 22149566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 221567877ebaSShri Abhyankar 221667877ebaSShri Abhyankar /* analysis phase */ 2217a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 2218a5e57a09SHong Zhang mumps->id.n = M; 2219a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 222067877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 2221a5e57a09SHong Zhang if (!mumps->myid) { 2222a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 2223a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2224a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2225ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 222667877ebaSShri Abhyankar } 222767877ebaSShri Abhyankar break; 222867877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2229a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2230a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2231a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2232ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 223325aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 22349566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 22359566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 22369566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 223725aac85cSJunchao Zhang } 223867877ebaSShri Abhyankar break; 223967877ebaSShri Abhyankar } 22403ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 22419566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 224267877ebaSShri Abhyankar 2243450b117fSShri Abhyankar F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2244dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 224551d5961aSHong Zhang F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2246b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2247d47f36abSHong Zhang 2248d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 22493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2250450b117fSShri Abhyankar } 2251b24902e0SBarry Smith 2252141f4205SHong Zhang /* Note the Petsc r permutation and factor info are ignored */ 2253d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2254d71ae5a4SJacob Faibussowitsch { 2255e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 225667877ebaSShri Abhyankar Vec b; 225767877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 2258397b6df1SKris Buschelman 2259397b6df1SKris Buschelman PetscFunctionBegin; 2260d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2261d47f36abSHong Zhang /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 22623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2263d47f36abSHong Zhang } 2264dcd589f8SShri Abhyankar 22659a2535b5SHong Zhang /* Set MUMPS options from the options database */ 226626cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 2267dcd589f8SShri Abhyankar 22689566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 22699566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2270dcd589f8SShri Abhyankar 227167877ebaSShri Abhyankar /* analysis phase */ 2272a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 2273a5e57a09SHong Zhang mumps->id.n = M; 2274a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 227567877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 2276a5e57a09SHong Zhang if (!mumps->myid) { 2277a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 2278a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2279a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2280ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 228167877ebaSShri Abhyankar } 228267877ebaSShri Abhyankar break; 228367877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2284a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2285a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2286a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2287ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 228825aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 22899566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 22909566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 22919566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 229225aac85cSJunchao Zhang } 229367877ebaSShri Abhyankar break; 229467877ebaSShri Abhyankar } 22953ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 22969566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 22975cd7cf9dSHong Zhang 22982792810eSHong Zhang F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2299dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 230051d5961aSHong Zhang F->ops->solvetranspose = MatSolve_MUMPS; 23014e34a73bSHong Zhang F->ops->matsolve = MatMatSolve_MUMPS; 230223a5080aSHong Zhang F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2303b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 23044e34a73bSHong Zhang #if defined(PETSC_USE_COMPLEX) 23050298fd71SBarry Smith F->ops->getinertia = NULL; 23064e34a73bSHong Zhang #else 23074e34a73bSHong Zhang F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2308db4efbfdSBarry Smith #endif 2309d47f36abSHong Zhang 2310d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 23113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2312b24902e0SBarry Smith } 2313b24902e0SBarry Smith 2314d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2315d71ae5a4SJacob Faibussowitsch { 231664e6c443SBarry Smith PetscBool iascii; 231764e6c443SBarry Smith PetscViewerFormat format; 2318e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2319f6c57405SHong Zhang 2320f6c57405SHong Zhang PetscFunctionBegin; 232164e6c443SBarry Smith /* check if matrix is mumps type */ 23223ba16761SJacob Faibussowitsch if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 232364e6c443SBarry Smith 23249566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 232564e6c443SBarry Smith if (iascii) { 23269566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 23271511cd71SPierre Jolivet if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 23289566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 23291511cd71SPierre Jolivet if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 23309566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 23319566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 23329566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 23339566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 23349566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 23359566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 23369566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 23379566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 23389566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 23399566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 23409566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 23419566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2342a5e57a09SHong Zhang if (mumps->id.ICNTL(11) > 0) { 23439566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 23449566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 23459566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 23469566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 23479566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 23489566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2349f6c57405SHong Zhang } 23509566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 23519566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 23529566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 235345e3843bSPierre Jolivet PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2354f6c57405SHong Zhang /* ICNTL(15-17) not used */ 23559566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 23569566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 23579566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 23589566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 23599566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 23609566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2361c0165424SHong Zhang 23629566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 23639566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 23649566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 23659566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 23669566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 23679566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 236842179a6aSHong Zhang 23699566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 23709566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 23719566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 23729566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 23739566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 23749566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2375f6c57405SHong Zhang 23769566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 23779566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 23789566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 23799566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 23809566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 23819566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2382f6c57405SHong Zhang 2383a5b23f4aSJose E. Roman /* information local to each processor */ 23849566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 23859566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 23869566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 23879566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 23889566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 23899566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 23909566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 23919566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 23929566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 23939566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2394f6c57405SHong Zhang 23959566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 23969566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 23979566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2398f6c57405SHong Zhang 23999566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 24009566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 24019566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2402f6c57405SHong Zhang 24039566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 24049566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 24059566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2406b34f08ffSHong Zhang 2407a0e18203SThibaut Appel if (mumps->ninfo && mumps->ninfo <= 80) { 2408b34f08ffSHong Zhang PetscInt i; 2409b34f08ffSHong Zhang for (i = 0; i < mumps->ninfo; i++) { 24109566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 24119566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 24129566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2413b34f08ffSHong Zhang } 2414b34f08ffSHong Zhang } 24159566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 24161511cd71SPierre Jolivet } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2417f6c57405SHong Zhang 24181511cd71SPierre Jolivet if (mumps->myid == 0) { /* information from the host */ 24199566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 24209566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 24219566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 24229566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2423f6c57405SHong Zhang 24249566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 24259566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 24269566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 24279566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 24289566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 24299566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 24309566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 24319566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 24329566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 24339566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 24349566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 24359566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 24369566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 24379566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 24389566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 24399566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 24409566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 24419566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 24429566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 24439566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 24449566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 24459566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 24469566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 24479566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 24489566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 24499566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 24509566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 24519566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 24529566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 24539566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 24549566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 24559566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 24569566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 24579566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2458f6c57405SHong Zhang } 2459f6c57405SHong Zhang } 2460cb828f0fSHong Zhang } 24613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2462f6c57405SHong Zhang } 2463f6c57405SHong Zhang 2464d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2465d71ae5a4SJacob Faibussowitsch { 2466e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 246735bd34faSBarry Smith 246835bd34faSBarry Smith PetscFunctionBegin; 246935bd34faSBarry Smith info->block_size = 1.0; 2470cb828f0fSHong Zhang info->nz_allocated = mumps->id.INFOG(20); 2471cb828f0fSHong Zhang info->nz_used = mumps->id.INFOG(20); 247235bd34faSBarry Smith info->nz_unneeded = 0.0; 247335bd34faSBarry Smith info->assemblies = 0.0; 247435bd34faSBarry Smith info->mallocs = 0.0; 247535bd34faSBarry Smith info->memory = 0.0; 247635bd34faSBarry Smith info->fill_ratio_given = 0; 247735bd34faSBarry Smith info->fill_ratio_needed = 0; 247835bd34faSBarry Smith info->factor_mallocs = 0; 24793ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 248035bd34faSBarry Smith } 248135bd34faSBarry Smith 2482d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2483d71ae5a4SJacob Faibussowitsch { 2484e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2485a3d589ffSStefano Zampini const PetscScalar *arr; 24868e7ba810SStefano Zampini const PetscInt *idxs; 24878e7ba810SStefano Zampini PetscInt size, i; 24886444a565SStefano Zampini 24896444a565SStefano Zampini PetscFunctionBegin; 24909566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is, &size)); 2491b3cb21ddSStefano Zampini /* Schur complement matrix */ 24929566063dSJacob Faibussowitsch PetscCall(MatDestroy(&F->schur)); 24939566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 24949566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2495a3d589ffSStefano Zampini mumps->id.schur = (MumpsScalar *)arr; 2496a3d589ffSStefano Zampini mumps->id.size_schur = size; 2497a3d589ffSStefano Zampini mumps->id.schur_lld = size; 24989566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 249948a46eb9SPierre Jolivet if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2500b3cb21ddSStefano Zampini 2501b3cb21ddSStefano Zampini /* MUMPS expects Fortran style indices */ 25029566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.listvar_schur)); 25039566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 25049566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is, &idxs)); 25059566063dSJacob Faibussowitsch for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 25069566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is, &idxs)); 250759ac8732SStefano Zampini /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2508b5fa320bSStefano Zampini mumps->id.ICNTL(26) = -1; 25093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25106444a565SStefano Zampini } 251159ac8732SStefano Zampini 2512d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2513d71ae5a4SJacob Faibussowitsch { 25146444a565SStefano Zampini Mat St; 2515e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 25166444a565SStefano Zampini PetscScalar *array; 25176444a565SStefano Zampini #if defined(PETSC_USE_COMPLEX) 25188ac429a0SStefano Zampini PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 25196444a565SStefano Zampini #endif 25206444a565SStefano Zampini 25216444a565SStefano Zampini PetscFunctionBegin; 252208401ef6SPierre Jolivet PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 25239566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 25249566063dSJacob Faibussowitsch PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 25259566063dSJacob Faibussowitsch PetscCall(MatSetType(St, MATDENSE)); 25269566063dSJacob Faibussowitsch PetscCall(MatSetUp(St)); 25279566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(St, &array)); 252859ac8732SStefano Zampini if (!mumps->sym) { /* MUMPS always return a full matrix */ 25296444a565SStefano Zampini if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 25306444a565SStefano Zampini PetscInt i, j, N = mumps->id.size_schur; 25316444a565SStefano Zampini for (i = 0; i < N; i++) { 25326444a565SStefano Zampini for (j = 0; j < N; j++) { 25336444a565SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 25346444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j]; 25356444a565SStefano Zampini #else 25366444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 25376444a565SStefano Zampini #endif 25386444a565SStefano Zampini array[j * N + i] = val; 25396444a565SStefano Zampini } 25406444a565SStefano Zampini } 25416444a565SStefano Zampini } else { /* stored by columns */ 25429566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 25436444a565SStefano Zampini } 25446444a565SStefano Zampini } else { /* either full or lower-triangular (not packed) */ 25456444a565SStefano Zampini if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 25466444a565SStefano Zampini PetscInt i, j, N = mumps->id.size_schur; 25476444a565SStefano Zampini for (i = 0; i < N; i++) { 25486444a565SStefano Zampini for (j = i; j < N; j++) { 25496444a565SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 25506444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j]; 25516444a565SStefano Zampini #else 25526444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 25536444a565SStefano Zampini #endif 25546444a565SStefano Zampini array[i * N + j] = val; 25556444a565SStefano Zampini array[j * N + i] = val; 25566444a565SStefano Zampini } 25576444a565SStefano Zampini } 25586444a565SStefano Zampini } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 25599566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 25606444a565SStefano Zampini } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 25616444a565SStefano Zampini PetscInt i, j, N = mumps->id.size_schur; 25626444a565SStefano Zampini for (i = 0; i < N; i++) { 25636444a565SStefano Zampini for (j = 0; j < i + 1; j++) { 25646444a565SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 25656444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j]; 25666444a565SStefano Zampini #else 25676444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 25686444a565SStefano Zampini #endif 25696444a565SStefano Zampini array[i * N + j] = val; 25706444a565SStefano Zampini array[j * N + i] = val; 25716444a565SStefano Zampini } 25726444a565SStefano Zampini } 25736444a565SStefano Zampini } 25746444a565SStefano Zampini } 25759566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(St, &array)); 25766444a565SStefano Zampini *S = St; 25773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25786444a565SStefano Zampini } 25796444a565SStefano Zampini 2580d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2581d71ae5a4SJacob Faibussowitsch { 2582e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 25835ccb76cbSHong Zhang 25845ccb76cbSHong Zhang PetscFunctionBegin; 2585413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2586413bcc21SPierre Jolivet PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 25879371c9d4SSatish Balay for (i = 0; i < nICNTL_pre; ++i) 25889371c9d4SSatish Balay if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2589413bcc21SPierre Jolivet if (i == nICNTL_pre) { /* not already cached */ 2590413bcc21SPierre Jolivet if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2591413bcc21SPierre Jolivet else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2592413bcc21SPierre Jolivet mumps->ICNTL_pre[0]++; 2593413bcc21SPierre Jolivet } 2594413bcc21SPierre Jolivet mumps->ICNTL_pre[1 + 2 * i] = icntl; 2595413bcc21SPierre Jolivet PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2596413bcc21SPierre Jolivet } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 25973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25985ccb76cbSHong Zhang } 25995ccb76cbSHong Zhang 2600d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2601d71ae5a4SJacob Faibussowitsch { 2602e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2603bc6112feSHong Zhang 2604bc6112feSHong Zhang PetscFunctionBegin; 260536df9881Sjeremy theler if (mumps->id.job == JOB_NULL) { 260636df9881Sjeremy theler PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 260736df9881Sjeremy theler *ival = 0; 260836df9881Sjeremy theler for (i = 0; i < nICNTL_pre; ++i) { 260936df9881Sjeremy theler if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 261036df9881Sjeremy theler } 261136df9881Sjeremy theler } else *ival = mumps->id.ICNTL(icntl); 26123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2613bc6112feSHong Zhang } 2614bc6112feSHong Zhang 26155ccb76cbSHong Zhang /*@ 26165ccb76cbSHong Zhang MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 26175ccb76cbSHong Zhang 2618c3339decSBarry Smith Logically Collective 26195ccb76cbSHong Zhang 26205ccb76cbSHong Zhang Input Parameters: 262111a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 26225ccb76cbSHong Zhang . icntl - index of MUMPS parameter array ICNTL() 26235ccb76cbSHong Zhang - ival - value of MUMPS ICNTL(icntl) 26245ccb76cbSHong Zhang 26253c7db156SBarry Smith Options Database Key: 2626147403d9SBarry Smith . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 26275ccb76cbSHong Zhang 26285ccb76cbSHong Zhang Level: beginner 26295ccb76cbSHong Zhang 263096a0c994SBarry Smith References: 2631606c0280SSatish Balay . * - MUMPS Users' Guide 26325ccb76cbSHong Zhang 26332ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 26345ccb76cbSHong Zhang @*/ 2635d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2636d71ae5a4SJacob Faibussowitsch { 26375ccb76cbSHong Zhang PetscFunctionBegin; 26382989dfd4SHong Zhang PetscValidType(F, 1); 263928b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 26405ccb76cbSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 26415ccb76cbSHong Zhang PetscValidLogicalCollectiveInt(F, ival, 3); 2642413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2643cac4c232SBarry Smith PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 26443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 26455ccb76cbSHong Zhang } 26465ccb76cbSHong Zhang 2647a21f80fcSHong Zhang /*@ 2648a21f80fcSHong Zhang MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2649a21f80fcSHong Zhang 2650c3339decSBarry Smith Logically Collective 2651a21f80fcSHong Zhang 2652a21f80fcSHong Zhang Input Parameters: 265311a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2654a21f80fcSHong Zhang - icntl - index of MUMPS parameter array ICNTL() 2655a21f80fcSHong Zhang 2656a21f80fcSHong Zhang Output Parameter: 2657a21f80fcSHong Zhang . ival - value of MUMPS ICNTL(icntl) 2658a21f80fcSHong Zhang 2659a21f80fcSHong Zhang Level: beginner 2660a21f80fcSHong Zhang 266196a0c994SBarry Smith References: 2662606c0280SSatish Balay . * - MUMPS Users' Guide 2663a21f80fcSHong Zhang 26642ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2665a21f80fcSHong Zhang @*/ 2666d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2667d71ae5a4SJacob Faibussowitsch { 2668bc6112feSHong Zhang PetscFunctionBegin; 26692989dfd4SHong Zhang PetscValidType(F, 1); 267028b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2671bc6112feSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 2672bc6112feSHong Zhang PetscValidIntPointer(ival, 3); 2673413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2674cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 26753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2676bc6112feSHong Zhang } 2677bc6112feSHong Zhang 2678d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2679d71ae5a4SJacob Faibussowitsch { 2680e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 26818928b65cSHong Zhang 26828928b65cSHong Zhang PetscFunctionBegin; 2683413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { 2684413bcc21SPierre Jolivet PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 26859371c9d4SSatish Balay for (i = 0; i < nCNTL_pre; ++i) 26869371c9d4SSatish Balay if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2687413bcc21SPierre Jolivet if (i == nCNTL_pre) { 2688413bcc21SPierre Jolivet if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2689413bcc21SPierre Jolivet else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2690413bcc21SPierre Jolivet mumps->CNTL_pre[0]++; 2691413bcc21SPierre Jolivet } 2692413bcc21SPierre Jolivet mumps->CNTL_pre[1 + 2 * i] = icntl; 2693413bcc21SPierre Jolivet mumps->CNTL_pre[2 + 2 * i] = val; 2694413bcc21SPierre Jolivet } else mumps->id.CNTL(icntl) = val; 26953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 26968928b65cSHong Zhang } 26978928b65cSHong Zhang 2698d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2699d71ae5a4SJacob Faibussowitsch { 2700e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2701bc6112feSHong Zhang 2702bc6112feSHong Zhang PetscFunctionBegin; 270336df9881Sjeremy theler if (mumps->id.job == JOB_NULL) { 270436df9881Sjeremy theler PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 270536df9881Sjeremy theler *val = 0.0; 270636df9881Sjeremy theler for (i = 0; i < nCNTL_pre; ++i) { 270736df9881Sjeremy theler if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 270836df9881Sjeremy theler } 270936df9881Sjeremy theler } else *val = mumps->id.CNTL(icntl); 27103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2711bc6112feSHong Zhang } 2712bc6112feSHong Zhang 27138928b65cSHong Zhang /*@ 27148928b65cSHong Zhang MatMumpsSetCntl - Set MUMPS parameter CNTL() 27158928b65cSHong Zhang 2716c3339decSBarry Smith Logically Collective 27178928b65cSHong Zhang 27188928b65cSHong Zhang Input Parameters: 271911a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 27208928b65cSHong Zhang . icntl - index of MUMPS parameter array CNTL() 27218928b65cSHong Zhang - val - value of MUMPS CNTL(icntl) 27228928b65cSHong Zhang 27233c7db156SBarry Smith Options Database Key: 2724147403d9SBarry Smith . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 27258928b65cSHong Zhang 27268928b65cSHong Zhang Level: beginner 27278928b65cSHong Zhang 272896a0c994SBarry Smith References: 2729606c0280SSatish Balay . * - MUMPS Users' Guide 27308928b65cSHong Zhang 27312ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 27328928b65cSHong Zhang @*/ 2733d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2734d71ae5a4SJacob Faibussowitsch { 27358928b65cSHong Zhang PetscFunctionBegin; 27362989dfd4SHong Zhang PetscValidType(F, 1); 273728b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 27388928b65cSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 2739bc6112feSHong Zhang PetscValidLogicalCollectiveReal(F, val, 3); 2740413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2741cac4c232SBarry Smith PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 27423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 27438928b65cSHong Zhang } 27448928b65cSHong Zhang 2745a21f80fcSHong Zhang /*@ 2746a21f80fcSHong Zhang MatMumpsGetCntl - Get MUMPS parameter CNTL() 2747a21f80fcSHong Zhang 2748c3339decSBarry Smith Logically Collective 2749a21f80fcSHong Zhang 2750a21f80fcSHong Zhang Input Parameters: 275111a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2752a21f80fcSHong Zhang - icntl - index of MUMPS parameter array CNTL() 2753a21f80fcSHong Zhang 2754a21f80fcSHong Zhang Output Parameter: 2755a21f80fcSHong Zhang . val - value of MUMPS CNTL(icntl) 2756a21f80fcSHong Zhang 2757a21f80fcSHong Zhang Level: beginner 2758a21f80fcSHong Zhang 275996a0c994SBarry Smith References: 2760606c0280SSatish Balay . * - MUMPS Users' Guide 2761a21f80fcSHong Zhang 27622ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2763a21f80fcSHong Zhang @*/ 2764d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2765d71ae5a4SJacob Faibussowitsch { 2766bc6112feSHong Zhang PetscFunctionBegin; 27672989dfd4SHong Zhang PetscValidType(F, 1); 276828b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2769bc6112feSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 2770bc6112feSHong Zhang PetscValidRealPointer(val, 3); 2771413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2772cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 27733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2774bc6112feSHong Zhang } 2775bc6112feSHong Zhang 2776d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2777d71ae5a4SJacob Faibussowitsch { 2778e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2779bc6112feSHong Zhang 2780bc6112feSHong Zhang PetscFunctionBegin; 2781bc6112feSHong Zhang *info = mumps->id.INFO(icntl); 27823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2783bc6112feSHong Zhang } 2784bc6112feSHong Zhang 2785d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2786d71ae5a4SJacob Faibussowitsch { 2787e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2788bc6112feSHong Zhang 2789bc6112feSHong Zhang PetscFunctionBegin; 2790bc6112feSHong Zhang *infog = mumps->id.INFOG(icntl); 27913ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2792bc6112feSHong Zhang } 2793bc6112feSHong Zhang 2794d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2795d71ae5a4SJacob Faibussowitsch { 2796e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2797bc6112feSHong Zhang 2798bc6112feSHong Zhang PetscFunctionBegin; 2799bc6112feSHong Zhang *rinfo = mumps->id.RINFO(icntl); 28003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2801bc6112feSHong Zhang } 2802bc6112feSHong Zhang 2803d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2804d71ae5a4SJacob Faibussowitsch { 2805e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2806bc6112feSHong Zhang 2807bc6112feSHong Zhang PetscFunctionBegin; 2808bc6112feSHong Zhang *rinfog = mumps->id.RINFOG(icntl); 28093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2810bc6112feSHong Zhang } 2811bc6112feSHong Zhang 28125c0bae8cSAshish Patel PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 28135c0bae8cSAshish Patel { 28145c0bae8cSAshish Patel Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 28155c0bae8cSAshish Patel 28165c0bae8cSAshish Patel PetscFunctionBegin; 28175c0bae8cSAshish Patel PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 28185c0bae8cSAshish Patel *size = 0; 28195c0bae8cSAshish Patel *array = NULL; 28205c0bae8cSAshish Patel if (!mumps->myid) { 28215c0bae8cSAshish Patel *size = mumps->id.INFOG(28); 28225c0bae8cSAshish Patel PetscCall(PetscMalloc1(*size, array)); 28235c0bae8cSAshish Patel for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 28245c0bae8cSAshish Patel } 28255c0bae8cSAshish Patel PetscFunctionReturn(PETSC_SUCCESS); 28265c0bae8cSAshish Patel } 28275c0bae8cSAshish Patel 2828d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2829d71ae5a4SJacob Faibussowitsch { 28300e6b8875SHong Zhang Mat Bt = NULL, Btseq = NULL; 28310e6b8875SHong Zhang PetscBool flg; 2832bb599dfdSHong Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2833bb599dfdSHong Zhang PetscScalar *aa; 2834f410b75aSHong Zhang PetscInt spnr, *ia, *ja, M, nrhs; 2835bb599dfdSHong Zhang 2836bb599dfdSHong Zhang PetscFunctionBegin; 2837064a246eSJacob Faibussowitsch PetscValidPointer(spRHS, 2); 2838013e2dc7SBarry Smith PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 28390e6b8875SHong Zhang if (flg) { 28409566063dSJacob Faibussowitsch PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2841013e2dc7SBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2842bb599dfdSHong Zhang 28439566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2844bb599dfdSHong Zhang 28452d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 28460e6b8875SHong Zhang Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 28470e6b8875SHong Zhang Btseq = b->A; 28480e6b8875SHong Zhang } else { 28490e6b8875SHong Zhang Btseq = Bt; 28500e6b8875SHong Zhang } 28510e6b8875SHong Zhang 28529566063dSJacob Faibussowitsch PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2853f410b75aSHong Zhang mumps->id.nrhs = nrhs; 2854f410b75aSHong Zhang mumps->id.lrhs = M; 2855f410b75aSHong Zhang mumps->id.rhs = NULL; 2856f410b75aSHong Zhang 2857e3f2db6aSHong Zhang if (!mumps->myid) { 28589566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 28599566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 286028b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 28619566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2862bb599dfdSHong Zhang mumps->id.rhs_sparse = (MumpsScalar *)aa; 2863e3f2db6aSHong Zhang } else { 2864e3f2db6aSHong Zhang mumps->id.irhs_ptr = NULL; 2865e3f2db6aSHong Zhang mumps->id.irhs_sparse = NULL; 2866e3f2db6aSHong Zhang mumps->id.nz_rhs = 0; 2867e3f2db6aSHong Zhang mumps->id.rhs_sparse = NULL; 2868e3f2db6aSHong Zhang } 2869bb599dfdSHong Zhang mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2870e3f2db6aSHong Zhang mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2871bb599dfdSHong Zhang 2872bb599dfdSHong Zhang /* solve phase */ 2873bb599dfdSHong Zhang mumps->id.job = JOB_SOLVE; 28743ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 2875049d1499SBarry Smith PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 287614267174SHong Zhang 2877e3f2db6aSHong Zhang if (!mumps->myid) { 28789566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 28799566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 288028b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2881e3f2db6aSHong Zhang } 28823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2883bb599dfdSHong Zhang } 2884bb599dfdSHong Zhang 2885bb599dfdSHong Zhang /*@ 28862ef1f0ffSBarry Smith MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 2887bb599dfdSHong Zhang 2888c3339decSBarry Smith Logically Collective 2889bb599dfdSHong Zhang 289020f4b53cSBarry Smith Input Parameter: 289120f4b53cSBarry Smith . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2892bb599dfdSHong Zhang 2893bb599dfdSHong Zhang Output Parameter: 289420f4b53cSBarry Smith . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 2895bb599dfdSHong Zhang 2896bb599dfdSHong Zhang Level: beginner 2897bb599dfdSHong Zhang 2898bb599dfdSHong Zhang References: 2899606c0280SSatish Balay . * - MUMPS Users' Guide 2900bb599dfdSHong Zhang 29012ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 2902bb599dfdSHong Zhang @*/ 2903d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2904d71ae5a4SJacob Faibussowitsch { 2905bb599dfdSHong Zhang PetscFunctionBegin; 2906bb599dfdSHong Zhang PetscValidType(F, 1); 290728b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2908cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 29093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2910bb599dfdSHong Zhang } 2911bb599dfdSHong Zhang 2912d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 2913d71ae5a4SJacob Faibussowitsch { 29140e6b8875SHong Zhang Mat spRHS; 29150e6b8875SHong Zhang 29160e6b8875SHong Zhang PetscFunctionBegin; 29179566063dSJacob Faibussowitsch PetscCall(MatCreateTranspose(spRHST, &spRHS)); 29189566063dSJacob Faibussowitsch PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 29199566063dSJacob Faibussowitsch PetscCall(MatDestroy(&spRHS)); 29203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 29210e6b8875SHong Zhang } 29220e6b8875SHong Zhang 29230e6b8875SHong Zhang /*@ 29242ef1f0ffSBarry Smith MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 29250e6b8875SHong Zhang 2926c3339decSBarry Smith Logically Collective 29270e6b8875SHong Zhang 292820f4b53cSBarry Smith Input Parameter: 292920f4b53cSBarry Smith . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 29300e6b8875SHong Zhang 29310e6b8875SHong Zhang Output Parameter: 293220f4b53cSBarry Smith . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 29330e6b8875SHong Zhang 29340e6b8875SHong Zhang Level: beginner 29350e6b8875SHong Zhang 29360e6b8875SHong Zhang References: 2937606c0280SSatish Balay . * - MUMPS Users' Guide 29380e6b8875SHong Zhang 29392ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 29400e6b8875SHong Zhang @*/ 2941d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 2942d71ae5a4SJacob Faibussowitsch { 29430e6b8875SHong Zhang PetscBool flg; 29440e6b8875SHong Zhang 29450e6b8875SHong Zhang PetscFunctionBegin; 29460e6b8875SHong Zhang PetscValidType(F, 1); 294728b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 29489566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 294928b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 29500e6b8875SHong Zhang 2951cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 29523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 29530e6b8875SHong Zhang } 29540e6b8875SHong Zhang 2955a21f80fcSHong Zhang /*@ 2956a21f80fcSHong Zhang MatMumpsGetInfo - Get MUMPS parameter INFO() 2957a21f80fcSHong Zhang 2958c3339decSBarry Smith Logically Collective 2959a21f80fcSHong Zhang 2960a21f80fcSHong Zhang Input Parameters: 296111a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2962a21f80fcSHong Zhang - icntl - index of MUMPS parameter array INFO() 2963a21f80fcSHong Zhang 2964a21f80fcSHong Zhang Output Parameter: 2965a21f80fcSHong Zhang . ival - value of MUMPS INFO(icntl) 2966a21f80fcSHong Zhang 2967a21f80fcSHong Zhang Level: beginner 2968a21f80fcSHong Zhang 296996a0c994SBarry Smith References: 2970606c0280SSatish Balay . * - MUMPS Users' Guide 2971a21f80fcSHong Zhang 29722ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2973a21f80fcSHong Zhang @*/ 2974d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 2975d71ae5a4SJacob Faibussowitsch { 2976bc6112feSHong Zhang PetscFunctionBegin; 29772989dfd4SHong Zhang PetscValidType(F, 1); 297828b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2979ca810319SHong Zhang PetscValidIntPointer(ival, 3); 2980cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 29813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2982bc6112feSHong Zhang } 2983bc6112feSHong Zhang 2984a21f80fcSHong Zhang /*@ 2985a21f80fcSHong Zhang MatMumpsGetInfog - Get MUMPS parameter INFOG() 2986a21f80fcSHong Zhang 2987c3339decSBarry Smith Logically Collective 2988a21f80fcSHong Zhang 2989a21f80fcSHong Zhang Input Parameters: 299011a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2991a21f80fcSHong Zhang - icntl - index of MUMPS parameter array INFOG() 2992a21f80fcSHong Zhang 2993a21f80fcSHong Zhang Output Parameter: 2994a21f80fcSHong Zhang . ival - value of MUMPS INFOG(icntl) 2995a21f80fcSHong Zhang 2996a21f80fcSHong Zhang Level: beginner 2997a21f80fcSHong Zhang 299896a0c994SBarry Smith References: 2999606c0280SSatish Balay . * - MUMPS Users' Guide 3000a21f80fcSHong Zhang 30012ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3002a21f80fcSHong Zhang @*/ 3003d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3004d71ae5a4SJacob Faibussowitsch { 3005bc6112feSHong Zhang PetscFunctionBegin; 30062989dfd4SHong Zhang PetscValidType(F, 1); 300728b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3008ca810319SHong Zhang PetscValidIntPointer(ival, 3); 3009cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 30103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3011bc6112feSHong Zhang } 3012bc6112feSHong Zhang 3013a21f80fcSHong Zhang /*@ 3014a21f80fcSHong Zhang MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3015a21f80fcSHong Zhang 3016c3339decSBarry Smith Logically Collective 3017a21f80fcSHong Zhang 3018a21f80fcSHong Zhang Input Parameters: 301911a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3020a21f80fcSHong Zhang - icntl - index of MUMPS parameter array RINFO() 3021a21f80fcSHong Zhang 3022a21f80fcSHong Zhang Output Parameter: 3023a21f80fcSHong Zhang . val - value of MUMPS RINFO(icntl) 3024a21f80fcSHong Zhang 3025a21f80fcSHong Zhang Level: beginner 3026a21f80fcSHong Zhang 302796a0c994SBarry Smith References: 3028606c0280SSatish Balay . * - MUMPS Users' Guide 3029a21f80fcSHong Zhang 30302ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3031a21f80fcSHong Zhang @*/ 3032d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3033d71ae5a4SJacob Faibussowitsch { 3034bc6112feSHong Zhang PetscFunctionBegin; 30352989dfd4SHong Zhang PetscValidType(F, 1); 303628b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3037bc6112feSHong Zhang PetscValidRealPointer(val, 3); 3038cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 30393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3040bc6112feSHong Zhang } 3041bc6112feSHong Zhang 3042a21f80fcSHong Zhang /*@ 3043a21f80fcSHong Zhang MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3044a21f80fcSHong Zhang 3045c3339decSBarry Smith Logically Collective 3046a21f80fcSHong Zhang 3047a21f80fcSHong Zhang Input Parameters: 304811a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3049a21f80fcSHong Zhang - icntl - index of MUMPS parameter array RINFOG() 3050a21f80fcSHong Zhang 3051a21f80fcSHong Zhang Output Parameter: 3052a21f80fcSHong Zhang . val - value of MUMPS RINFOG(icntl) 3053a21f80fcSHong Zhang 3054a21f80fcSHong Zhang Level: beginner 3055a21f80fcSHong Zhang 305696a0c994SBarry Smith References: 3057606c0280SSatish Balay . * - MUMPS Users' Guide 3058a21f80fcSHong Zhang 30592ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3060a21f80fcSHong Zhang @*/ 3061d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3062d71ae5a4SJacob Faibussowitsch { 3063bc6112feSHong Zhang PetscFunctionBegin; 30642989dfd4SHong Zhang PetscValidType(F, 1); 306528b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3066bc6112feSHong Zhang PetscValidRealPointer(val, 3); 3067cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 30683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3069bc6112feSHong Zhang } 3070bc6112feSHong Zhang 30715c0bae8cSAshish Patel /*@ 30725c0bae8cSAshish Patel MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 30735c0bae8cSAshish Patel 30745c0bae8cSAshish Patel Logically Collective 30755c0bae8cSAshish Patel 30765c0bae8cSAshish Patel Input Parameter: 30775c0bae8cSAshish Patel . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 30785c0bae8cSAshish Patel 30795c0bae8cSAshish Patel Output Parameters: 30805c0bae8cSAshish Patel + size - local size of the array. The size of the array is non-zero only on the host. 30815c0bae8cSAshish Patel - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 30825c0bae8cSAshish Patel for freeing this array. 30835c0bae8cSAshish Patel 30845c0bae8cSAshish Patel Level: beginner 30855c0bae8cSAshish Patel 30865c0bae8cSAshish Patel References: 30875c0bae8cSAshish Patel . * - MUMPS Users' Guide 30885c0bae8cSAshish Patel 30895c0bae8cSAshish Patel .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 30905c0bae8cSAshish Patel @*/ 30915c0bae8cSAshish Patel PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 30925c0bae8cSAshish Patel { 30935c0bae8cSAshish Patel PetscFunctionBegin; 30945c0bae8cSAshish Patel PetscValidType(F, 1); 30955c0bae8cSAshish Patel PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 30965c0bae8cSAshish Patel PetscValidIntPointer(size, 3); 30975c0bae8cSAshish Patel PetscValidPointer(array, 4); 30985c0bae8cSAshish Patel PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 30995c0bae8cSAshish Patel PetscFunctionReturn(PETSC_SUCCESS); 31005c0bae8cSAshish Patel } 31015c0bae8cSAshish Patel 310224b6179bSKris Buschelman /*MC 31032692d6eeSBarry Smith MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 310424b6179bSKris Buschelman distributed and sequential matrices via the external package MUMPS. 310524b6179bSKris Buschelman 310611a5261eSBarry Smith Works with `MATAIJ` and `MATSBAIJ` matrices 310724b6179bSKris Buschelman 3108c2b89b5dSBarry Smith Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3109c2b89b5dSBarry Smith 31102ef1f0ffSBarry Smith Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 31112ef1f0ffSBarry Smith See details below. 3112217d3b1eSJunchao Zhang 31132ef1f0ffSBarry Smith Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3114c2b89b5dSBarry Smith 311524b6179bSKris Buschelman Options Database Keys: 31164422a9fcSPatrick Sanan + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 31174422a9fcSPatrick Sanan . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 31184422a9fcSPatrick Sanan . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 31194422a9fcSPatrick Sanan . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 31204422a9fcSPatrick Sanan . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3121b53c1a7fSBarry Smith . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3122b53c1a7fSBarry Smith Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 31234422a9fcSPatrick Sanan . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 31244422a9fcSPatrick Sanan . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 31254422a9fcSPatrick Sanan . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 31264422a9fcSPatrick Sanan . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 31274422a9fcSPatrick Sanan . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 31284422a9fcSPatrick Sanan . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 312945e3843bSPierre Jolivet . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 31304422a9fcSPatrick Sanan . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 313125aac85cSJunchao Zhang . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 31324422a9fcSPatrick Sanan . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 31334422a9fcSPatrick Sanan . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 31344422a9fcSPatrick Sanan . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 31354422a9fcSPatrick Sanan . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 31364422a9fcSPatrick Sanan . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 31374422a9fcSPatrick Sanan . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 31384422a9fcSPatrick Sanan . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 31394422a9fcSPatrick Sanan . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 31404422a9fcSPatrick Sanan . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 31414422a9fcSPatrick Sanan . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3142a0e18203SThibaut Appel . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3143a0e18203SThibaut Appel . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3144a0e18203SThibaut Appel . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 31454422a9fcSPatrick Sanan . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 31464422a9fcSPatrick Sanan . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 31474422a9fcSPatrick Sanan . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 31484422a9fcSPatrick Sanan . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3149217d3b1eSJunchao Zhang . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3150a0e18203SThibaut Appel . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3151217d3b1eSJunchao Zhang - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3152217d3b1eSJunchao Zhang Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 315324b6179bSKris Buschelman 315424b6179bSKris Buschelman Level: beginner 315524b6179bSKris Buschelman 315695452b02SPatrick Sanan Notes: 31572ef1f0ffSBarry Smith MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 31582ef1f0ffSBarry Smith error if the matrix is Hermitian. 315938548759SBarry Smith 316026cc229bSBarry Smith When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 316126cc229bSBarry Smith `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 316226cc229bSBarry Smith 31632ef1f0ffSBarry Smith When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 31642ef1f0ffSBarry Smith the failure with 31652ef1f0ffSBarry Smith .vb 31662ef1f0ffSBarry Smith KSPGetPC(ksp,&pc); 31672ef1f0ffSBarry Smith PCFactorGetMatrix(pc,&mat); 31682ef1f0ffSBarry Smith MatMumpsGetInfo(mat,....); 31692ef1f0ffSBarry Smith MatMumpsGetInfog(mat,....); etc. 31702ef1f0ffSBarry Smith .ve 31712ef1f0ffSBarry Smith Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 31729fc87aa7SBarry Smith 3173a5399872SJunchao Zhang MUMPS provides 64-bit integer support in two build modes: 3174a5399872SJunchao Zhang full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3175a5399872SJunchao Zhang requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 31768fcaa860SBarry Smith 3177a5399872SJunchao Zhang selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3178a5399872SJunchao Zhang MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3179a5399872SJunchao Zhang columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3180a5399872SJunchao Zhang integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3181a5399872SJunchao Zhang 3182a5399872SJunchao Zhang With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3183a5399872SJunchao Zhang 3184a5399872SJunchao Zhang Two modes to run MUMPS/PETSc with OpenMP 31852ef1f0ffSBarry Smith .vb 31862ef1f0ffSBarry Smith Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 31872ef1f0ffSBarry Smith threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 31882ef1f0ffSBarry Smith .ve 31898fcaa860SBarry Smith 31902ef1f0ffSBarry Smith .vb 31912ef1f0ffSBarry Smith -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 31922ef1f0ffSBarry Smith if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 31932ef1f0ffSBarry Smith .ve 31948fcaa860SBarry Smith 31958fcaa860SBarry Smith To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 31962ef1f0ffSBarry Smith (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 31972ef1f0ffSBarry Smith (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 31988fcaa860SBarry Smith libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 31998fcaa860SBarry Smith (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3200217d3b1eSJunchao Zhang 32018fcaa860SBarry Smith If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3202217d3b1eSJunchao Zhang processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3203217d3b1eSJunchao Zhang size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3204217d3b1eSJunchao Zhang are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3205217d3b1eSJunchao Zhang by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3206217d3b1eSJunchao Zhang In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3207217d3b1eSJunchao Zhang if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3208217d3b1eSJunchao Zhang MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3209217d3b1eSJunchao Zhang cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3210217d3b1eSJunchao Zhang problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 32118fcaa860SBarry Smith For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3212217d3b1eSJunchao Zhang examine the mapping result. 3213217d3b1eSJunchao Zhang 321411a5261eSBarry Smith PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 321511a5261eSBarry Smith for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 321611a5261eSBarry Smith calls `omp_set_num_threads`(m) internally before calling MUMPS. 3217217d3b1eSJunchao Zhang 3218217d3b1eSJunchao Zhang References: 3219606c0280SSatish Balay + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3220606c0280SSatish Balay - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3221217d3b1eSJunchao Zhang 32222ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 322324b6179bSKris Buschelman M*/ 322424b6179bSKris Buschelman 3225d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3226d71ae5a4SJacob Faibussowitsch { 322735bd34faSBarry Smith PetscFunctionBegin; 32282692d6eeSBarry Smith *type = MATSOLVERMUMPS; 32293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 323035bd34faSBarry Smith } 323135bd34faSBarry Smith 3232bccb9932SShri Abhyankar /* MatGetFactor for Seq and MPI AIJ matrices */ 3233d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3234d71ae5a4SJacob Faibussowitsch { 32352877fffaSHong Zhang Mat B; 32362877fffaSHong Zhang Mat_MUMPS *mumps; 3237ace3abfcSBarry Smith PetscBool isSeqAIJ; 32382c7c0729SBarry Smith PetscMPIInt size; 32392877fffaSHong Zhang 32402877fffaSHong Zhang PetscFunctionBegin; 3241eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 3242b94d7dedSBarry Smith PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3243eb1ec7c1SStefano Zampini #endif 32442877fffaSHong Zhang /* Create the factorization matrix */ 32459566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 32469566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 32479566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 32489566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 32499566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 32502877fffaSHong Zhang 32514dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 32522205254eSKarl Rupp 32532877fffaSHong Zhang B->ops->view = MatView_MUMPS; 325435bd34faSBarry Smith B->ops->getinfo = MatGetInfo_MUMPS; 32552205254eSKarl Rupp 32569566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 32579566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 32589566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 32599566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 32609566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 32619566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 32629566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 32639566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 32649566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 32659566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 32669566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 32675c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 32689566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 32699566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 32706444a565SStefano Zampini 3271450b117fSShri Abhyankar if (ftype == MAT_FACTOR_LU) { 3272450b117fSShri Abhyankar B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3273d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 3274bccb9932SShri Abhyankar if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3275bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 32769566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3277746480a1SHong Zhang mumps->sym = 0; 3278dcd589f8SShri Abhyankar } else { 327967877ebaSShri Abhyankar B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3280450b117fSShri Abhyankar B->factortype = MAT_FACTOR_CHOLESKY; 3281bccb9932SShri Abhyankar if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3282bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 32839566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 328459ac8732SStefano Zampini #if defined(PETSC_USE_COMPLEX) 328559ac8732SStefano Zampini mumps->sym = 2; 328659ac8732SStefano Zampini #else 3287b94d7dedSBarry Smith if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 32886fdc2a6dSBarry Smith else mumps->sym = 2; 328959ac8732SStefano Zampini #endif 3290450b117fSShri Abhyankar } 32912877fffaSHong Zhang 329200c67f3bSHong Zhang /* set solvertype */ 32939566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 32949566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 32959566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 32962c7c0729SBarry Smith if (size == 1) { 32974ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3298f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 32992c7c0729SBarry Smith } 33002877fffaSHong Zhang B->ops->destroy = MatDestroy_MUMPS; 3301e69c285eSBarry Smith B->data = (void *)mumps; 33022205254eSKarl Rupp 33032877fffaSHong Zhang *F = B; 3304413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3305413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3306413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3307d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 33083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 33092877fffaSHong Zhang } 33102877fffaSHong Zhang 3311bccb9932SShri Abhyankar /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3312d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3313d71ae5a4SJacob Faibussowitsch { 33142877fffaSHong Zhang Mat B; 33152877fffaSHong Zhang Mat_MUMPS *mumps; 3316ace3abfcSBarry Smith PetscBool isSeqSBAIJ; 33172c7c0729SBarry Smith PetscMPIInt size; 33182877fffaSHong Zhang 33192877fffaSHong Zhang PetscFunctionBegin; 3320eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 3321b94d7dedSBarry Smith PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3322eb1ec7c1SStefano Zampini #endif 33239566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 33249566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 33259566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 33269566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 3327e69c285eSBarry Smith 33284dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 33299566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3330bccb9932SShri Abhyankar if (isSeqSBAIJ) { 333116ebf90aSShri Abhyankar mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3332dcd589f8SShri Abhyankar } else { 3333bccb9932SShri Abhyankar mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3334bccb9932SShri Abhyankar } 3335bccb9932SShri Abhyankar 333667877ebaSShri Abhyankar B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3337bccb9932SShri Abhyankar B->ops->view = MatView_MUMPS; 3338722b6324SPierre Jolivet B->ops->getinfo = MatGetInfo_MUMPS; 33392205254eSKarl Rupp 33409566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 33419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 33429566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 33439566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 33449566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 33459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 33469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 33479566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 33489566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 33499566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 33509566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 33515c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 33529566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 33539566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 33542205254eSKarl Rupp 3355f4762488SHong Zhang B->factortype = MAT_FACTOR_CHOLESKY; 335659ac8732SStefano Zampini #if defined(PETSC_USE_COMPLEX) 335759ac8732SStefano Zampini mumps->sym = 2; 335859ac8732SStefano Zampini #else 3359b94d7dedSBarry Smith if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 33606fdc2a6dSBarry Smith else mumps->sym = 2; 336159ac8732SStefano Zampini #endif 3362a214ac2aSShri Abhyankar 336300c67f3bSHong Zhang /* set solvertype */ 33649566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 33659566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 33669566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 33672c7c0729SBarry Smith if (size == 1) { 33684ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3369f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 33702c7c0729SBarry Smith } 33719566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3372f3c0ef26SHong Zhang B->ops->destroy = MatDestroy_MUMPS; 3373e69c285eSBarry Smith B->data = (void *)mumps; 33742205254eSKarl Rupp 33752877fffaSHong Zhang *F = B; 3376413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3377413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3378413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3379d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 33803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 33812877fffaSHong Zhang } 338297969023SHong Zhang 3383d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3384d71ae5a4SJacob Faibussowitsch { 338567877ebaSShri Abhyankar Mat B; 338667877ebaSShri Abhyankar Mat_MUMPS *mumps; 3387ace3abfcSBarry Smith PetscBool isSeqBAIJ; 33882c7c0729SBarry Smith PetscMPIInt size; 338967877ebaSShri Abhyankar 339067877ebaSShri Abhyankar PetscFunctionBegin; 339167877ebaSShri Abhyankar /* Create the factorization matrix */ 33929566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 33939566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 33949566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 33959566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 33969566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 3397450b117fSShri Abhyankar 33984dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 3399450b117fSShri Abhyankar if (ftype == MAT_FACTOR_LU) { 3400450b117fSShri Abhyankar B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3401450b117fSShri Abhyankar B->factortype = MAT_FACTOR_LU; 3402bccb9932SShri Abhyankar if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3403bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3404746480a1SHong Zhang mumps->sym = 0; 34059566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3406546078acSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3407bccb9932SShri Abhyankar 3408450b117fSShri Abhyankar B->ops->view = MatView_MUMPS; 3409722b6324SPierre Jolivet B->ops->getinfo = MatGetInfo_MUMPS; 34102205254eSKarl Rupp 34119566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 34129566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 34139566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 34149566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 34159566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 34169566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 34179566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 34189566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 34199566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 34209566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 34219566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 34225c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 34239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 34249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3425450b117fSShri Abhyankar 342600c67f3bSHong Zhang /* set solvertype */ 34279566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 34289566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 34299566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 34302c7c0729SBarry Smith if (size == 1) { 34314ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3432f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 34332c7c0729SBarry Smith } 34347ee00b23SStefano Zampini B->ops->destroy = MatDestroy_MUMPS; 34357ee00b23SStefano Zampini B->data = (void *)mumps; 34367ee00b23SStefano Zampini 34377ee00b23SStefano Zampini *F = B; 3438413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3439413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3440413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3441d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 34423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 34437ee00b23SStefano Zampini } 34447ee00b23SStefano Zampini 34457ee00b23SStefano Zampini /* MatGetFactor for Seq and MPI SELL matrices */ 3446d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3447d71ae5a4SJacob Faibussowitsch { 34487ee00b23SStefano Zampini Mat B; 34497ee00b23SStefano Zampini Mat_MUMPS *mumps; 34507ee00b23SStefano Zampini PetscBool isSeqSELL; 34512c7c0729SBarry Smith PetscMPIInt size; 34527ee00b23SStefano Zampini 34537ee00b23SStefano Zampini PetscFunctionBegin; 34547ee00b23SStefano Zampini /* Create the factorization matrix */ 34559566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 34569566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 34579566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 34589566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 34599566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 34607ee00b23SStefano Zampini 34614dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 34627ee00b23SStefano Zampini 34637ee00b23SStefano Zampini B->ops->view = MatView_MUMPS; 34647ee00b23SStefano Zampini B->ops->getinfo = MatGetInfo_MUMPS; 34657ee00b23SStefano Zampini 34669566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 34679566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 34689566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 34699566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 34709566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 34719566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 34729566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 34739566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 34749566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 34759566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 34769566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 34775c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 34787ee00b23SStefano Zampini 34797ee00b23SStefano Zampini if (ftype == MAT_FACTOR_LU) { 34807ee00b23SStefano Zampini B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 34817ee00b23SStefano Zampini B->factortype = MAT_FACTOR_LU; 34827ee00b23SStefano Zampini if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 34837ee00b23SStefano Zampini else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 34847ee00b23SStefano Zampini mumps->sym = 0; 34859566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 34867ee00b23SStefano Zampini } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 34877ee00b23SStefano Zampini 34887ee00b23SStefano Zampini /* set solvertype */ 34899566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 34909566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 34919566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 34922c7c0729SBarry Smith if (size == 1) { 34934ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3494f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 34952c7c0729SBarry Smith } 3496450b117fSShri Abhyankar B->ops->destroy = MatDestroy_MUMPS; 3497e69c285eSBarry Smith B->data = (void *)mumps; 34982205254eSKarl Rupp 3499450b117fSShri Abhyankar *F = B; 3500413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3501413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3502413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3503d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 35043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3505450b117fSShri Abhyankar } 350642c9c57cSBarry Smith 3507*9d0448ceSStefano Zampini /* MatGetFactor for MATNEST matrices */ 3508*9d0448ceSStefano Zampini static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3509*9d0448ceSStefano Zampini { 3510*9d0448ceSStefano Zampini Mat B, **mats; 3511*9d0448ceSStefano Zampini Mat_MUMPS *mumps; 3512*9d0448ceSStefano Zampini PetscInt nr, nc; 3513*9d0448ceSStefano Zampini PetscMPIInt size; 3514*9d0448ceSStefano Zampini 3515*9d0448ceSStefano Zampini PetscFunctionBegin; 3516*9d0448ceSStefano Zampini #if defined(PETSC_USE_COMPLEX) 3517*9d0448ceSStefano Zampini PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported"); 3518*9d0448ceSStefano Zampini #endif 3519*9d0448ceSStefano Zampini 3520*9d0448ceSStefano Zampini /* Errors if some condition is not satisfied */ 3521*9d0448ceSStefano Zampini PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3522*9d0448ceSStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY) { 3523*9d0448ceSStefano Zampini IS *rows, *cols; 3524*9d0448ceSStefano Zampini PetscBool flg = PETSC_TRUE; 3525*9d0448ceSStefano Zampini PetscInt *m, *M; 3526*9d0448ceSStefano Zampini 3527*9d0448ceSStefano Zampini PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3528*9d0448ceSStefano Zampini PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3529*9d0448ceSStefano Zampini PetscCall(MatNestGetISs(A, rows, cols)); 3530*9d0448ceSStefano Zampini for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3531*9d0448ceSStefano Zampini if (!flg) PetscCall(PetscFree2(rows, cols)); 3532*9d0448ceSStefano Zampini PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU."); 3533*9d0448ceSStefano Zampini PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3534*9d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3535*9d0448ceSStefano Zampini for (PetscInt r = 0; flg && r < nr; r++) 3536*9d0448ceSStefano Zampini for (PetscInt k = r + 1; flg && k < nr; k++) 3537*9d0448ceSStefano Zampini if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3538*9d0448ceSStefano Zampini PetscCall(PetscFree2(m, M)); 3539*9d0448ceSStefano Zampini PetscCall(PetscFree2(rows, cols)); 3540*9d0448ceSStefano Zampini PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU."); 3541*9d0448ceSStefano Zampini } 3542*9d0448ceSStefano Zampini 3543*9d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 3544*9d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 3545*9d0448ceSStefano Zampini Mat sub = mats[r][c]; 3546*9d0448ceSStefano Zampini PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ; 3547*9d0448ceSStefano Zampini 3548*9d0448ceSStefano Zampini if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3549*9d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3550*9d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3551*9d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3552*9d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3553*9d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3554*9d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3555*9d0448ceSStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY) { 3556*9d0448ceSStefano Zampini if (r == c) PetscCheck(isSeqAIJ || isMPIAIJ || isSeqSBAIJ || isMPISBAIJ, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for diagonal block of type %s", ((PetscObject)sub)->type_name); 3557*9d0448ceSStefano Zampini else PetscCheck(isSeqAIJ || isMPIAIJ || isSeqBAIJ || isMPIBAIJ, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for offdiagonal block of type %s", ((PetscObject)sub)->type_name); 3558*9d0448ceSStefano Zampini } else PetscCheck(isSeqAIJ || isMPIAIJ || isSeqBAIJ || isMPIBAIJ, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 3559*9d0448ceSStefano Zampini } 3560*9d0448ceSStefano Zampini } 3561*9d0448ceSStefano Zampini 3562*9d0448ceSStefano Zampini /* Create the factorization matrix */ 3563*9d0448ceSStefano Zampini PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3564*9d0448ceSStefano Zampini PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3565*9d0448ceSStefano Zampini PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3566*9d0448ceSStefano Zampini PetscCall(MatSetUp(B)); 3567*9d0448ceSStefano Zampini 3568*9d0448ceSStefano Zampini PetscCall(PetscNew(&mumps)); 3569*9d0448ceSStefano Zampini 3570*9d0448ceSStefano Zampini B->ops->view = MatView_MUMPS; 3571*9d0448ceSStefano Zampini B->ops->getinfo = MatGetInfo_MUMPS; 3572*9d0448ceSStefano Zampini 3573*9d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3574*9d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3575*9d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3576*9d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3577*9d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3578*9d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3579*9d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3580*9d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3581*9d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3582*9d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3583*9d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3584*9d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3585*9d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3586*9d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3587*9d0448ceSStefano Zampini 3588*9d0448ceSStefano Zampini if (ftype == MAT_FACTOR_LU) { 3589*9d0448ceSStefano Zampini B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3590*9d0448ceSStefano Zampini B->factortype = MAT_FACTOR_LU; 3591*9d0448ceSStefano Zampini mumps->sym = 0; 3592*9d0448ceSStefano Zampini } else { 3593*9d0448ceSStefano Zampini B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3594*9d0448ceSStefano Zampini B->factortype = MAT_FACTOR_CHOLESKY; 3595*9d0448ceSStefano Zampini #if defined(PETSC_USE_COMPLEX) 3596*9d0448ceSStefano Zampini mumps->sym = 2; 3597*9d0448ceSStefano Zampini #else 3598*9d0448ceSStefano Zampini if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3599*9d0448ceSStefano Zampini else mumps->sym = 2; 3600*9d0448ceSStefano Zampini #endif 3601*9d0448ceSStefano Zampini } 3602*9d0448ceSStefano Zampini mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3603*9d0448ceSStefano Zampini PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3604*9d0448ceSStefano Zampini 3605*9d0448ceSStefano Zampini PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3606*9d0448ceSStefano Zampini if (size == 1) { 3607*9d0448ceSStefano Zampini /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3608*9d0448ceSStefano Zampini B->canuseordering = PETSC_TRUE; 3609*9d0448ceSStefano Zampini } 3610*9d0448ceSStefano Zampini 3611*9d0448ceSStefano Zampini /* set solvertype */ 3612*9d0448ceSStefano Zampini PetscCall(PetscFree(B->solvertype)); 3613*9d0448ceSStefano Zampini PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3614*9d0448ceSStefano Zampini B->ops->destroy = MatDestroy_MUMPS; 3615*9d0448ceSStefano Zampini B->data = (void *)mumps; 3616*9d0448ceSStefano Zampini 3617*9d0448ceSStefano Zampini *F = B; 3618*9d0448ceSStefano Zampini mumps->id.job = JOB_NULL; 3619*9d0448ceSStefano Zampini mumps->ICNTL_pre = NULL; 3620*9d0448ceSStefano Zampini mumps->CNTL_pre = NULL; 3621*9d0448ceSStefano Zampini mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3622*9d0448ceSStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 3623*9d0448ceSStefano Zampini } 3624*9d0448ceSStefano Zampini 3625d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3626d71ae5a4SJacob Faibussowitsch { 362742c9c57cSBarry Smith PetscFunctionBegin; 36289566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 36299566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 36309566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 36319566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 36329566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 36339566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 36349566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 36359566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 36369566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 36379566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 36389566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3639*9d0448ceSStefano Zampini PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3640*9d0448ceSStefano Zampini PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 36413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 364242c9c57cSBarry Smith } 3643