11c2a3de1SBarry Smith 2397b6df1SKris Buschelman /* 3c2b5dc30SHong Zhang Provides an interface to the MUMPS sparse solver 4397b6df1SKris Buschelman */ 567602552SJunchao Zhang #include <petscpkg_version.h> 69d0448ceSStefano Zampini #include <petscsf.h> 7c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 8c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 97ee00b23SStefano Zampini #include <../src/mat/impls/sell/mpi/mpisell.h> 10397b6df1SKris Buschelman 11397b6df1SKris Buschelman EXTERN_C_BEGIN 12397b6df1SKris Buschelman #if defined(PETSC_USE_COMPLEX) 132907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 142907cef9SHong Zhang #include <cmumps_c.h> 152907cef9SHong Zhang #else 16c6db04a5SJed Brown #include <zmumps_c.h> 172907cef9SHong Zhang #endif 182907cef9SHong Zhang #else 192907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 202907cef9SHong Zhang #include <smumps_c.h> 21397b6df1SKris Buschelman #else 22c6db04a5SJed Brown #include <dmumps_c.h> 23397b6df1SKris Buschelman #endif 242907cef9SHong Zhang #endif 25397b6df1SKris Buschelman EXTERN_C_END 26397b6df1SKris Buschelman #define JOB_INIT -1 27413bcc21SPierre Jolivet #define JOB_NULL 0 283d472b54SHong Zhang #define JOB_FACTSYMBOLIC 1 293d472b54SHong Zhang #define JOB_FACTNUMERIC 2 303d472b54SHong Zhang #define JOB_SOLVE 3 31397b6df1SKris Buschelman #define JOB_END -2 323d472b54SHong Zhang 332907cef9SHong Zhang /* calls to MUMPS */ 342907cef9SHong Zhang #if defined(PETSC_USE_COMPLEX) 352907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 363ab56b82SJunchao Zhang #define MUMPS_c cmumps_c 372907cef9SHong Zhang #else 383ab56b82SJunchao Zhang #define MUMPS_c zmumps_c 392907cef9SHong Zhang #endif 402907cef9SHong Zhang #else 412907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 423ab56b82SJunchao Zhang #define MUMPS_c smumps_c 432907cef9SHong Zhang #else 443ab56b82SJunchao Zhang #define MUMPS_c dmumps_c 452907cef9SHong Zhang #endif 462907cef9SHong Zhang #endif 472907cef9SHong Zhang 48a6053eceSJunchao Zhang /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 49a6053eceSJunchao Zhang number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 50a6053eceSJunchao Zhang naming convention in PetscMPIInt, PetscBLASInt etc. 51a6053eceSJunchao Zhang */ 52a6053eceSJunchao Zhang typedef MUMPS_INT PetscMUMPSInt; 53a6053eceSJunchao Zhang 5467602552SJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 5567602552SJunchao Zhang #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 56a6053eceSJunchao Zhang #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 5767602552SJunchao Zhang #endif 58a6053eceSJunchao Zhang #else 5967602552SJunchao Zhang #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 6067602552SJunchao Zhang #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 6167602552SJunchao Zhang #endif 6267602552SJunchao Zhang #endif 6367602552SJunchao Zhang 64a6053eceSJunchao Zhang #define MPIU_MUMPSINT MPI_INT 65a6053eceSJunchao Zhang #define PETSC_MUMPS_INT_MAX 2147483647 66a6053eceSJunchao Zhang #define PETSC_MUMPS_INT_MIN -2147483648 67a6053eceSJunchao Zhang 68a6053eceSJunchao Zhang /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 69d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 70d71ae5a4SJacob Faibussowitsch { 71a6053eceSJunchao Zhang PetscFunctionBegin; 72ece88022SPierre Jolivet #if PetscDefined(USE_64BIT_INDICES) 732c71b3e2SJacob Faibussowitsch PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 74ece88022SPierre Jolivet #endif 75a6053eceSJunchao Zhang *b = (PetscMUMPSInt)(a); 763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 77a6053eceSJunchao Zhang } 78a6053eceSJunchao Zhang 79a6053eceSJunchao Zhang /* Put these utility routines here since they are only used in this file */ 80d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 81d71ae5a4SJacob Faibussowitsch { 82a6053eceSJunchao Zhang PetscInt myval; 83a6053eceSJunchao Zhang PetscBool myset; 84a6053eceSJunchao Zhang PetscFunctionBegin; 85a6053eceSJunchao Zhang /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 869566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 879566063dSJacob Faibussowitsch if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 88a6053eceSJunchao Zhang if (set) *set = myset; 893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 90a6053eceSJunchao Zhang } 91a6053eceSJunchao Zhang #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 92a6053eceSJunchao Zhang 93217d3b1eSJunchao Zhang /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 943ab56b82SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 953ab56b82SJunchao Zhang #define PetscMUMPS_c(mumps) \ 963ab56b82SJunchao Zhang do { \ 973ab56b82SJunchao Zhang if (mumps->use_petsc_omp_support) { \ 983ab56b82SJunchao Zhang if (mumps->is_omp_master) { \ 999566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 1003ab56b82SJunchao Zhang MUMPS_c(&mumps->id); \ 1019566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 1023ab56b82SJunchao Zhang } \ 1039566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 104c3714a1dSJunchao Zhang /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 105c3714a1dSJunchao Zhang to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 106c3714a1dSJunchao Zhang an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 107c3714a1dSJunchao Zhang omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 108c3714a1dSJunchao Zhang */ \ 1099566063dSJacob Faibussowitsch PetscCallMPI(MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 1109566063dSJacob Faibussowitsch PetscCallMPI(MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm)); \ 1119566063dSJacob Faibussowitsch PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 1123ab56b82SJunchao Zhang } else { \ 1133ab56b82SJunchao Zhang MUMPS_c(&mumps->id); \ 1143ab56b82SJunchao Zhang } \ 1153ab56b82SJunchao Zhang } while (0) 1163ab56b82SJunchao Zhang #else 1173ab56b82SJunchao Zhang #define PetscMUMPS_c(mumps) \ 118d71ae5a4SJacob Faibussowitsch do { \ 119d71ae5a4SJacob Faibussowitsch MUMPS_c(&mumps->id); \ 120d71ae5a4SJacob Faibussowitsch } while (0) 1213ab56b82SJunchao Zhang #endif 1223ab56b82SJunchao Zhang 123940cd9d6SSatish Balay /* declare MumpsScalar */ 124940cd9d6SSatish Balay #if defined(PETSC_USE_COMPLEX) 125940cd9d6SSatish Balay #if defined(PETSC_USE_REAL_SINGLE) 126940cd9d6SSatish Balay #define MumpsScalar mumps_complex 127940cd9d6SSatish Balay #else 128940cd9d6SSatish Balay #define MumpsScalar mumps_double_complex 129940cd9d6SSatish Balay #endif 130940cd9d6SSatish Balay #else 131940cd9d6SSatish Balay #define MumpsScalar PetscScalar 132940cd9d6SSatish Balay #endif 1333d472b54SHong Zhang 134397b6df1SKris Buschelman /* macros s.t. indices match MUMPS documentation */ 135397b6df1SKris Buschelman #define ICNTL(I) icntl[(I)-1] 136397b6df1SKris Buschelman #define CNTL(I) cntl[(I)-1] 137397b6df1SKris Buschelman #define INFOG(I) infog[(I)-1] 138a7aca84bSHong Zhang #define INFO(I) info[(I)-1] 139397b6df1SKris Buschelman #define RINFOG(I) rinfog[(I)-1] 140adc1d99fSHong Zhang #define RINFO(I) rinfo[(I)-1] 141397b6df1SKris Buschelman 142a6053eceSJunchao Zhang typedef struct Mat_MUMPS Mat_MUMPS; 143a6053eceSJunchao Zhang struct Mat_MUMPS { 144397b6df1SKris Buschelman #if defined(PETSC_USE_COMPLEX) 1452907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 1462907cef9SHong Zhang CMUMPS_STRUC_C id; 1472907cef9SHong Zhang #else 148397b6df1SKris Buschelman ZMUMPS_STRUC_C id; 1492907cef9SHong Zhang #endif 1502907cef9SHong Zhang #else 1512907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 1522907cef9SHong Zhang SMUMPS_STRUC_C id; 153397b6df1SKris Buschelman #else 154397b6df1SKris Buschelman DMUMPS_STRUC_C id; 155397b6df1SKris Buschelman #endif 1562907cef9SHong Zhang #endif 1572907cef9SHong Zhang 158397b6df1SKris Buschelman MatStructure matstruc; 1592d4298aeSJunchao Zhang PetscMPIInt myid, petsc_size; 160a6053eceSJunchao Zhang PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 161a6053eceSJunchao Zhang PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 162a6053eceSJunchao Zhang PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 163a6053eceSJunchao Zhang PetscMUMPSInt sym; 1642d4298aeSJunchao Zhang MPI_Comm mumps_comm; 165413bcc21SPierre Jolivet PetscMUMPSInt *ICNTL_pre; 166413bcc21SPierre Jolivet PetscReal *CNTL_pre; 167a6053eceSJunchao Zhang PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 168801fbe65SHong Zhang VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 16925aac85cSJunchao Zhang PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 17067602552SJunchao Zhang PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 17167602552SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 17267602552SJunchao Zhang PetscInt *rhs_nrow, max_nrhs; 17367602552SJunchao Zhang PetscMPIInt *rhs_recvcounts, *rhs_disps; 17467602552SJunchao Zhang PetscScalar *rhs_loc, *rhs_recvbuf; 17567602552SJunchao Zhang #endif 176801fbe65SHong Zhang Vec b_seq, x_seq; 177a6053eceSJunchao Zhang PetscInt ninfo, *info; /* which INFO to display */ 178b5fa320bSStefano Zampini PetscInt sizeredrhs; 17959ac8732SStefano Zampini PetscScalar *schur_sol; 18059ac8732SStefano Zampini PetscInt schur_sizesol; 181a6053eceSJunchao Zhang PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 182a6053eceSJunchao Zhang PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 183a6053eceSJunchao Zhang PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 1842205254eSKarl Rupp 1859d0448ceSStefano Zampini /* Support for MATNEST */ 1869d0448ceSStefano Zampini PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 1879d0448ceSStefano Zampini PetscInt64 *nest_vals_start; 1889d0448ceSStefano Zampini PetscScalar *nest_vals; 1899d0448ceSStefano Zampini 190a6053eceSJunchao Zhang /* stuff used by petsc/mumps OpenMP support*/ 1913ab56b82SJunchao Zhang PetscBool use_petsc_omp_support; 192da81f932SPierre Jolivet PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 1933ab56b82SJunchao Zhang MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 194a6053eceSJunchao Zhang PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 195a6053eceSJunchao Zhang PetscMPIInt tag, omp_comm_size; 1963ab56b82SJunchao Zhang PetscBool is_omp_master; /* is this rank the master of omp_comm */ 197a6053eceSJunchao Zhang MPI_Request *reqs; 198a6053eceSJunchao Zhang }; 1993ab56b82SJunchao Zhang 200a6053eceSJunchao Zhang /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 201a6053eceSJunchao Zhang Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 202a6053eceSJunchao Zhang */ 203d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 204d71ae5a4SJacob Faibussowitsch { 205a6053eceSJunchao Zhang PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 206f0c56d0fSKris Buschelman 207a6053eceSJunchao Zhang PetscFunctionBegin; 208a6053eceSJunchao Zhang #if defined(PETSC_USE_64BIT_INDICES) 209a6053eceSJunchao Zhang { 210a6053eceSJunchao Zhang PetscInt i; 211a6053eceSJunchao Zhang if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 2129566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ia_alloc)); 2139566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 214a6053eceSJunchao Zhang mumps->cur_ilen = nrow + 1; 215a6053eceSJunchao Zhang } 216a6053eceSJunchao Zhang if (nnz > mumps->cur_jlen) { 2179566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ja_alloc)); 2189566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 219a6053eceSJunchao Zhang mumps->cur_jlen = nnz; 220a6053eceSJunchao Zhang } 2219566063dSJacob Faibussowitsch for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 2229566063dSJacob Faibussowitsch for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 223a6053eceSJunchao Zhang *ia_mumps = mumps->ia_alloc; 224a6053eceSJunchao Zhang *ja_mumps = mumps->ja_alloc; 225a6053eceSJunchao Zhang } 226a6053eceSJunchao Zhang #else 227a6053eceSJunchao Zhang *ia_mumps = ia; 228a6053eceSJunchao Zhang *ja_mumps = ja; 229a6053eceSJunchao Zhang #endif 2309566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 2313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 232a6053eceSJunchao Zhang } 233b24902e0SBarry Smith 234d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 235d71ae5a4SJacob Faibussowitsch { 236b5fa320bSStefano Zampini PetscFunctionBegin; 2379566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.listvar_schur)); 2389566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.redrhs)); 2399566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->schur_sol)); 24059ac8732SStefano Zampini mumps->id.size_schur = 0; 241b3cb21ddSStefano Zampini mumps->id.schur_lld = 0; 24259ac8732SStefano Zampini mumps->id.ICNTL(19) = 0; 2433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 24459ac8732SStefano Zampini } 24559ac8732SStefano Zampini 246b3cb21ddSStefano Zampini /* solve with rhs in mumps->id.redrhs and return in the same location */ 247d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 248d71ae5a4SJacob Faibussowitsch { 249b3cb21ddSStefano Zampini Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 250b3cb21ddSStefano Zampini Mat S, B, X; 251b3cb21ddSStefano Zampini MatFactorSchurStatus schurstatus; 252b3cb21ddSStefano Zampini PetscInt sizesol; 25359ac8732SStefano Zampini 25459ac8732SStefano Zampini PetscFunctionBegin; 2559566063dSJacob Faibussowitsch PetscCall(MatFactorFactorizeSchurComplement(F)); 2569566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 2579566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 2589566063dSJacob Faibussowitsch PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 259a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2609566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(B, S->boundtocpu)); 261a3d589ffSStefano Zampini #endif 262b3cb21ddSStefano Zampini switch (schurstatus) { 263d71ae5a4SJacob Faibussowitsch case MAT_FACTOR_SCHUR_FACTORED: 264d71ae5a4SJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 265d71ae5a4SJacob Faibussowitsch PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 266a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2679566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(X, S->boundtocpu)); 268a3d589ffSStefano Zampini #endif 269b3cb21ddSStefano Zampini if (!mumps->id.ICNTL(9)) { /* transpose solve */ 2709566063dSJacob Faibussowitsch PetscCall(MatMatSolveTranspose(S, B, X)); 27159ac8732SStefano Zampini } else { 2729566063dSJacob Faibussowitsch PetscCall(MatMatSolve(S, B, X)); 27359ac8732SStefano Zampini } 274b3cb21ddSStefano Zampini break; 275b3cb21ddSStefano Zampini case MAT_FACTOR_SCHUR_INVERTED: 276b3cb21ddSStefano Zampini sizesol = mumps->id.nrhs * mumps->id.size_schur; 27759ac8732SStefano Zampini if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 2789566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->schur_sol)); 2799566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 28059ac8732SStefano Zampini mumps->schur_sizesol = sizesol; 281b5fa320bSStefano Zampini } 2829566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 2839566063dSJacob Faibussowitsch PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 284a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2859566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(X, S->boundtocpu)); 286a3d589ffSStefano Zampini #endif 2879566063dSJacob Faibussowitsch PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 28859ac8732SStefano Zampini if (!mumps->id.ICNTL(9)) { /* transpose solve */ 2899566063dSJacob Faibussowitsch PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 290b5fa320bSStefano Zampini } else { 2919566063dSJacob Faibussowitsch PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 292b5fa320bSStefano Zampini } 2939566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(X)); 2949566063dSJacob Faibussowitsch PetscCall(MatProductSymbolic(X)); 2959566063dSJacob Faibussowitsch PetscCall(MatProductNumeric(X)); 2964417c5e8SHong Zhang 2979566063dSJacob Faibussowitsch PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 298b3cb21ddSStefano Zampini break; 299d71ae5a4SJacob Faibussowitsch default: 300d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 30159ac8732SStefano Zampini } 3029566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 3039566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 3049566063dSJacob Faibussowitsch PetscCall(MatDestroy(&X)); 3053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 306b5fa320bSStefano Zampini } 307b5fa320bSStefano Zampini 308d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 309d71ae5a4SJacob Faibussowitsch { 310b3cb21ddSStefano Zampini Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 311b5fa320bSStefano Zampini 312b5fa320bSStefano Zampini PetscFunctionBegin; 313b5fa320bSStefano Zampini if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 3143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 315b5fa320bSStefano Zampini } 316b8f61ee1SStefano Zampini if (!expansion) { /* prepare for the condensation step */ 317b5fa320bSStefano Zampini PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 318b5fa320bSStefano Zampini /* allocate MUMPS internal array to store reduced right-hand sides */ 319b5fa320bSStefano Zampini if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 3209566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.redrhs)); 321b5fa320bSStefano Zampini mumps->id.lredrhs = mumps->id.size_schur; 3229566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 323b5fa320bSStefano Zampini mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 324b5fa320bSStefano Zampini } 325b5fa320bSStefano Zampini mumps->id.ICNTL(26) = 1; /* condensation phase */ 326b5fa320bSStefano Zampini } else { /* prepare for the expansion step */ 327b8f61ee1SStefano Zampini /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 3289566063dSJacob Faibussowitsch PetscCall(MatMumpsSolveSchur_Private(F)); 329b5fa320bSStefano Zampini mumps->id.ICNTL(26) = 2; /* expansion phase */ 3303ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 33108401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 332b5fa320bSStefano Zampini /* restore defaults */ 333b5fa320bSStefano Zampini mumps->id.ICNTL(26) = -1; 334d3d598ffSStefano Zampini /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 335d3d598ffSStefano Zampini if (mumps->id.nrhs > 1) { 3369566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.redrhs)); 337d3d598ffSStefano Zampini mumps->id.lredrhs = 0; 338d3d598ffSStefano Zampini mumps->sizeredrhs = 0; 339d3d598ffSStefano Zampini } 340b5fa320bSStefano Zampini } 3413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 342b5fa320bSStefano Zampini } 343b5fa320bSStefano Zampini 344397b6df1SKris Buschelman /* 345d341cd04SHong Zhang MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 346d341cd04SHong Zhang 347397b6df1SKris Buschelman input: 34875480915SPierre Jolivet A - matrix in aij,baij or sbaij format 349397b6df1SKris Buschelman shift - 0: C style output triple; 1: Fortran style output triple. 350bccb9932SShri Abhyankar reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 351bccb9932SShri Abhyankar MAT_REUSE_MATRIX: only the values in v array are updated 352397b6df1SKris Buschelman output: 353397b6df1SKris Buschelman nnz - dim of r, c, and v (number of local nonzero entries of A) 354397b6df1SKris Buschelman r, c, v - row and col index, matrix values (matrix triples) 355eb9baa12SBarry Smith 356eb9baa12SBarry Smith The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 3577ee00b23SStefano Zampini freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 358eb9baa12SBarry Smith that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 359eb9baa12SBarry Smith 360397b6df1SKris Buschelman */ 36116ebf90aSShri Abhyankar 362d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 363d71ae5a4SJacob Faibussowitsch { 364a3d589ffSStefano Zampini const PetscScalar *av; 365185f6596SHong Zhang const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 366a6053eceSJunchao Zhang PetscInt64 nz, rnz, i, j, k; 367a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 36816ebf90aSShri Abhyankar Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 369397b6df1SKris Buschelman 370397b6df1SKris Buschelman PetscFunctionBegin; 3719566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A, &av)); 372a6053eceSJunchao Zhang mumps->val = (PetscScalar *)av; 373bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 3742205254eSKarl Rupp nz = aa->nz; 3752205254eSKarl Rupp ai = aa->i; 3762205254eSKarl Rupp aj = aa->j; 3779566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 378a6053eceSJunchao Zhang for (i = k = 0; i < M; i++) { 37916ebf90aSShri Abhyankar rnz = ai[i + 1] - ai[i]; 38067877ebaSShri Abhyankar ajj = aj + ai[i]; 38167877ebaSShri Abhyankar for (j = 0; j < rnz; j++) { 3829566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 3839566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 384a6053eceSJunchao Zhang k++; 38516ebf90aSShri Abhyankar } 38616ebf90aSShri Abhyankar } 387a6053eceSJunchao Zhang mumps->irn = row; 388a6053eceSJunchao Zhang mumps->jcn = col; 389a6053eceSJunchao Zhang mumps->nnz = nz; 39016ebf90aSShri Abhyankar } 3919566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 3923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 39316ebf90aSShri Abhyankar } 394397b6df1SKris Buschelman 395d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 396d71ae5a4SJacob Faibussowitsch { 397a6053eceSJunchao Zhang PetscInt64 nz, i, j, k, r; 3987ee00b23SStefano Zampini Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 399a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 4007ee00b23SStefano Zampini 4017ee00b23SStefano Zampini PetscFunctionBegin; 402a6053eceSJunchao Zhang mumps->val = a->val; 4037ee00b23SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 4047ee00b23SStefano Zampini nz = a->sliidx[a->totalslices]; 4059566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 406a6053eceSJunchao Zhang for (i = k = 0; i < a->totalslices; i++) { 40748a46eb9SPierre Jolivet for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 4087ee00b23SStefano Zampini } 4099566063dSJacob Faibussowitsch for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 410a6053eceSJunchao Zhang mumps->irn = row; 411a6053eceSJunchao Zhang mumps->jcn = col; 412a6053eceSJunchao Zhang mumps->nnz = nz; 4137ee00b23SStefano Zampini } 4143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4157ee00b23SStefano Zampini } 4167ee00b23SStefano Zampini 417d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 418d71ae5a4SJacob Faibussowitsch { 41967877ebaSShri Abhyankar Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 42033d57670SJed Brown const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 421a6053eceSJunchao Zhang PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 422a6053eceSJunchao Zhang PetscInt bs; 423a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 42467877ebaSShri Abhyankar 42567877ebaSShri Abhyankar PetscFunctionBegin; 4269566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 42733d57670SJed Brown M = A->rmap->N / bs; 428a6053eceSJunchao Zhang mumps->val = aa->a; 429bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 4309371c9d4SSatish Balay ai = aa->i; 4319371c9d4SSatish Balay aj = aa->j; 43267877ebaSShri Abhyankar nz = bs2 * aa->nz; 4339566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 43467877ebaSShri Abhyankar for (i = 0; i < M; i++) { 43567877ebaSShri Abhyankar ajj = aj + ai[i]; 43667877ebaSShri Abhyankar rnz = ai[i + 1] - ai[i]; 43767877ebaSShri Abhyankar for (k = 0; k < rnz; k++) { 43867877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 43967877ebaSShri Abhyankar for (m = 0; m < bs; m++) { 4409566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 4419566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 442a6053eceSJunchao Zhang idx++; 44367877ebaSShri Abhyankar } 44467877ebaSShri Abhyankar } 44567877ebaSShri Abhyankar } 44667877ebaSShri Abhyankar } 447a6053eceSJunchao Zhang mumps->irn = row; 448a6053eceSJunchao Zhang mumps->jcn = col; 449a6053eceSJunchao Zhang mumps->nnz = nz; 45067877ebaSShri Abhyankar } 4513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 45267877ebaSShri Abhyankar } 45367877ebaSShri Abhyankar 454d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 455d71ae5a4SJacob Faibussowitsch { 45675480915SPierre Jolivet const PetscInt *ai, *aj, *ajj; 457a6053eceSJunchao Zhang PetscInt bs; 458a6053eceSJunchao Zhang PetscInt64 nz, rnz, i, j, k, m; 459a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 46075480915SPierre Jolivet PetscScalar *val; 46116ebf90aSShri Abhyankar Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 46275480915SPierre Jolivet const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 46338548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 464b94d7dedSBarry Smith PetscBool isset, hermitian; 46538548759SBarry Smith #endif 46616ebf90aSShri Abhyankar 46716ebf90aSShri Abhyankar PetscFunctionBegin; 46838548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 469b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 470b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 47138548759SBarry Smith #endif 4722205254eSKarl Rupp ai = aa->i; 4732205254eSKarl Rupp aj = aa->j; 4749566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 47575480915SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 476f3fa974cSJacob Faibussowitsch const PetscInt64 alloc_size = aa->nz * bs2; 477f3fa974cSJacob Faibussowitsch 478f3fa974cSJacob Faibussowitsch PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 479a6053eceSJunchao Zhang if (bs > 1) { 480f3fa974cSJacob Faibussowitsch PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 481a6053eceSJunchao Zhang mumps->val = mumps->val_alloc; 48275480915SPierre Jolivet } else { 483a6053eceSJunchao Zhang mumps->val = aa->a; 48475480915SPierre Jolivet } 485a6053eceSJunchao Zhang mumps->irn = row; 486a6053eceSJunchao Zhang mumps->jcn = col; 487a6053eceSJunchao Zhang } else { 488a6053eceSJunchao Zhang if (bs == 1) mumps->val = aa->a; 489a6053eceSJunchao Zhang row = mumps->irn; 490a6053eceSJunchao Zhang col = mumps->jcn; 491a6053eceSJunchao Zhang } 492a6053eceSJunchao Zhang val = mumps->val; 493185f6596SHong Zhang 49416ebf90aSShri Abhyankar nz = 0; 495a81fe166SPierre Jolivet if (bs > 1) { 49675480915SPierre Jolivet for (i = 0; i < mbs; i++) { 49716ebf90aSShri Abhyankar rnz = ai[i + 1] - ai[i]; 49867877ebaSShri Abhyankar ajj = aj + ai[i]; 49975480915SPierre Jolivet for (j = 0; j < rnz; j++) { 50075480915SPierre Jolivet for (k = 0; k < bs; k++) { 50175480915SPierre Jolivet for (m = 0; m < bs; m++) { 502ec4f40fdSPierre Jolivet if (ajj[j] > i || k >= m) { 50375480915SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 5049566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 5059566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 50675480915SPierre Jolivet } 50775480915SPierre Jolivet val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 50875480915SPierre Jolivet } 50975480915SPierre Jolivet } 51075480915SPierre Jolivet } 51175480915SPierre Jolivet } 51275480915SPierre Jolivet } 513a81fe166SPierre Jolivet } else if (reuse == MAT_INITIAL_MATRIX) { 514a81fe166SPierre Jolivet for (i = 0; i < mbs; i++) { 515a81fe166SPierre Jolivet rnz = ai[i + 1] - ai[i]; 516a81fe166SPierre Jolivet ajj = aj + ai[i]; 517a81fe166SPierre Jolivet for (j = 0; j < rnz; j++) { 5189566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 5199566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 520a6053eceSJunchao Zhang nz++; 521a81fe166SPierre Jolivet } 522a81fe166SPierre Jolivet } 52308401ef6SPierre Jolivet PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 52475480915SPierre Jolivet } 525a6053eceSJunchao Zhang if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 5263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 52716ebf90aSShri Abhyankar } 52816ebf90aSShri Abhyankar 529d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 530d71ae5a4SJacob Faibussowitsch { 53167877ebaSShri Abhyankar const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 532a6053eceSJunchao Zhang PetscInt64 nz, rnz, i, j; 53367877ebaSShri Abhyankar const PetscScalar *av, *v1; 53416ebf90aSShri Abhyankar PetscScalar *val; 535a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 536829b1710SHong Zhang Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 53729b521d4Sstefano_zampini PetscBool missing; 53838548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 539b94d7dedSBarry Smith PetscBool hermitian, isset; 54038548759SBarry Smith #endif 54116ebf90aSShri Abhyankar 54216ebf90aSShri Abhyankar PetscFunctionBegin; 54338548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 544b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 545b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 54638548759SBarry Smith #endif 5479566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A, &av)); 5489371c9d4SSatish Balay ai = aa->i; 5499371c9d4SSatish Balay aj = aa->j; 55016ebf90aSShri Abhyankar adiag = aa->diag; 5519566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 552bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 5537ee00b23SStefano Zampini /* count nz in the upper triangular part of A */ 554829b1710SHong Zhang nz = 0; 55529b521d4Sstefano_zampini if (missing) { 55629b521d4Sstefano_zampini for (i = 0; i < M; i++) { 55729b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 55829b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 55929b521d4Sstefano_zampini if (aj[j] < i) continue; 56029b521d4Sstefano_zampini nz++; 56129b521d4Sstefano_zampini } 56229b521d4Sstefano_zampini } else { 56329b521d4Sstefano_zampini nz += ai[i + 1] - adiag[i]; 56429b521d4Sstefano_zampini } 56529b521d4Sstefano_zampini } 56629b521d4Sstefano_zampini } else { 567829b1710SHong Zhang for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 56829b521d4Sstefano_zampini } 5699566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 5709566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 571a6053eceSJunchao Zhang mumps->nnz = nz; 572a6053eceSJunchao Zhang mumps->irn = row; 573a6053eceSJunchao Zhang mumps->jcn = col; 574a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 575185f6596SHong Zhang 57616ebf90aSShri Abhyankar nz = 0; 57729b521d4Sstefano_zampini if (missing) { 57829b521d4Sstefano_zampini for (i = 0; i < M; i++) { 57929b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 58029b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 58129b521d4Sstefano_zampini if (aj[j] < i) continue; 5829566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 5839566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 58429b521d4Sstefano_zampini val[nz] = av[j]; 58529b521d4Sstefano_zampini nz++; 58629b521d4Sstefano_zampini } 58729b521d4Sstefano_zampini } else { 58829b521d4Sstefano_zampini rnz = ai[i + 1] - adiag[i]; 58929b521d4Sstefano_zampini ajj = aj + adiag[i]; 59029b521d4Sstefano_zampini v1 = av + adiag[i]; 59129b521d4Sstefano_zampini for (j = 0; j < rnz; j++) { 5929566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 5939566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 594a6053eceSJunchao Zhang val[nz++] = v1[j]; 59529b521d4Sstefano_zampini } 59629b521d4Sstefano_zampini } 59729b521d4Sstefano_zampini } 59829b521d4Sstefano_zampini } else { 59916ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 60016ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 60167877ebaSShri Abhyankar ajj = aj + adiag[i]; 602cf3759fdSShri Abhyankar v1 = av + adiag[i]; 60367877ebaSShri Abhyankar for (j = 0; j < rnz; j++) { 6049566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 6059566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 606a6053eceSJunchao Zhang val[nz++] = v1[j]; 60716ebf90aSShri Abhyankar } 60816ebf90aSShri Abhyankar } 60929b521d4Sstefano_zampini } 610397b6df1SKris Buschelman } else { 611a6053eceSJunchao Zhang nz = 0; 612a6053eceSJunchao Zhang val = mumps->val; 61329b521d4Sstefano_zampini if (missing) { 61416ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 61529b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 61629b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 61729b521d4Sstefano_zampini if (aj[j] < i) continue; 61829b521d4Sstefano_zampini val[nz++] = av[j]; 61929b521d4Sstefano_zampini } 62029b521d4Sstefano_zampini } else { 62116ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 62267877ebaSShri Abhyankar v1 = av + adiag[i]; 623ad540459SPierre Jolivet for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 62416ebf90aSShri Abhyankar } 62516ebf90aSShri Abhyankar } 62629b521d4Sstefano_zampini } else { 62716ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 62816ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 62916ebf90aSShri Abhyankar v1 = av + adiag[i]; 630ad540459SPierre Jolivet for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 63116ebf90aSShri Abhyankar } 63216ebf90aSShri Abhyankar } 63329b521d4Sstefano_zampini } 6349566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 6353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 63616ebf90aSShri Abhyankar } 63716ebf90aSShri Abhyankar 638d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 639d71ae5a4SJacob Faibussowitsch { 640a6053eceSJunchao Zhang const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 641a6053eceSJunchao Zhang PetscInt bs; 642a6053eceSJunchao Zhang PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 643a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 64416ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 64516ebf90aSShri Abhyankar PetscScalar *val; 646397b6df1SKris Buschelman Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 647397b6df1SKris Buschelman Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 648397b6df1SKris Buschelman Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 649ec4f40fdSPierre Jolivet const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 65038548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 651b94d7dedSBarry Smith PetscBool hermitian, isset; 65238548759SBarry Smith #endif 65316ebf90aSShri Abhyankar 65416ebf90aSShri Abhyankar PetscFunctionBegin; 65538548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 656b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 657b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 65838548759SBarry Smith #endif 6599566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 66038548759SBarry Smith rstart = A->rmap->rstart; 66138548759SBarry Smith ai = aa->i; 66238548759SBarry Smith aj = aa->j; 66338548759SBarry Smith bi = bb->i; 66438548759SBarry Smith bj = bb->j; 66538548759SBarry Smith av = aa->a; 66638548759SBarry Smith bv = bb->a; 667397b6df1SKris Buschelman 6682205254eSKarl Rupp garray = mat->garray; 6692205254eSKarl Rupp 670bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 671a6053eceSJunchao Zhang nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 6729566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 6739566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 674a6053eceSJunchao Zhang /* can not decide the exact mumps->nnz now because of the SBAIJ */ 675a6053eceSJunchao Zhang mumps->irn = row; 676a6053eceSJunchao Zhang mumps->jcn = col; 677a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 678397b6df1SKris Buschelman } else { 679a6053eceSJunchao Zhang val = mumps->val; 680397b6df1SKris Buschelman } 681397b6df1SKris Buschelman 6829371c9d4SSatish Balay jj = 0; 6839371c9d4SSatish Balay irow = rstart; 684ec4f40fdSPierre Jolivet for (i = 0; i < mbs; i++) { 685397b6df1SKris Buschelman ajj = aj + ai[i]; /* ptr to the beginning of this row */ 686397b6df1SKris Buschelman countA = ai[i + 1] - ai[i]; 687397b6df1SKris Buschelman countB = bi[i + 1] - bi[i]; 688397b6df1SKris Buschelman bjj = bj + bi[i]; 689ec4f40fdSPierre Jolivet v1 = av + ai[i] * bs2; 690ec4f40fdSPierre Jolivet v2 = bv + bi[i] * bs2; 691397b6df1SKris Buschelman 692ec4f40fdSPierre Jolivet if (bs > 1) { 693ec4f40fdSPierre Jolivet /* A-part */ 694ec4f40fdSPierre Jolivet for (j = 0; j < countA; j++) { 695ec4f40fdSPierre Jolivet for (k = 0; k < bs; k++) { 696ec4f40fdSPierre Jolivet for (m = 0; m < bs; m++) { 697ec4f40fdSPierre Jolivet if (rstart + ajj[j] * bs > irow || k >= m) { 698ec4f40fdSPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 6999566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 7009566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 701ec4f40fdSPierre Jolivet } 702ec4f40fdSPierre Jolivet val[jj++] = v1[j * bs2 + m + k * bs]; 703ec4f40fdSPierre Jolivet } 704ec4f40fdSPierre Jolivet } 705ec4f40fdSPierre Jolivet } 706ec4f40fdSPierre Jolivet } 707ec4f40fdSPierre Jolivet 708ec4f40fdSPierre Jolivet /* B-part */ 709ec4f40fdSPierre Jolivet for (j = 0; j < countB; j++) { 710ec4f40fdSPierre Jolivet for (k = 0; k < bs; k++) { 711ec4f40fdSPierre Jolivet for (m = 0; m < bs; m++) { 712ec4f40fdSPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 7139566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 7149566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 715ec4f40fdSPierre Jolivet } 716ec4f40fdSPierre Jolivet val[jj++] = v2[j * bs2 + m + k * bs]; 717ec4f40fdSPierre Jolivet } 718ec4f40fdSPierre Jolivet } 719ec4f40fdSPierre Jolivet } 720ec4f40fdSPierre Jolivet } else { 721397b6df1SKris Buschelman /* A-part */ 722397b6df1SKris Buschelman for (j = 0; j < countA; j++) { 723bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 7249566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 7259566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 726397b6df1SKris Buschelman } 72716ebf90aSShri Abhyankar val[jj++] = v1[j]; 728397b6df1SKris Buschelman } 72916ebf90aSShri Abhyankar 73016ebf90aSShri Abhyankar /* B-part */ 73116ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 732bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 7339566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 7349566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 735397b6df1SKris Buschelman } 73616ebf90aSShri Abhyankar val[jj++] = v2[j]; 73716ebf90aSShri Abhyankar } 73816ebf90aSShri Abhyankar } 739ec4f40fdSPierre Jolivet irow += bs; 740ec4f40fdSPierre Jolivet } 7415d955bbbSStefano Zampini if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 7423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 74316ebf90aSShri Abhyankar } 74416ebf90aSShri Abhyankar 745d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 746d71ae5a4SJacob Faibussowitsch { 74716ebf90aSShri Abhyankar const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 7485d955bbbSStefano Zampini PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 749a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 75016ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 75116ebf90aSShri Abhyankar PetscScalar *val; 752a3d589ffSStefano Zampini Mat Ad, Ao; 753a3d589ffSStefano Zampini Mat_SeqAIJ *aa; 754a3d589ffSStefano Zampini Mat_SeqAIJ *bb; 75516ebf90aSShri Abhyankar 75616ebf90aSShri Abhyankar PetscFunctionBegin; 7579566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 7589566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 7599566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 760a3d589ffSStefano Zampini 761a3d589ffSStefano Zampini aa = (Mat_SeqAIJ *)(Ad)->data; 762a3d589ffSStefano Zampini bb = (Mat_SeqAIJ *)(Ao)->data; 76338548759SBarry Smith ai = aa->i; 76438548759SBarry Smith aj = aa->j; 76538548759SBarry Smith bi = bb->i; 76638548759SBarry Smith bj = bb->j; 76716ebf90aSShri Abhyankar 768a3d589ffSStefano Zampini rstart = A->rmap->rstart; 7695d955bbbSStefano Zampini cstart = A->cmap->rstart; 7702205254eSKarl Rupp 771bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 772a6053eceSJunchao Zhang nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 7739566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 7749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 775a6053eceSJunchao Zhang mumps->nnz = nz; 776a6053eceSJunchao Zhang mumps->irn = row; 777a6053eceSJunchao Zhang mumps->jcn = col; 778a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 77916ebf90aSShri Abhyankar } else { 780a6053eceSJunchao Zhang val = mumps->val; 78116ebf90aSShri Abhyankar } 78216ebf90aSShri Abhyankar 7839371c9d4SSatish Balay jj = 0; 7849371c9d4SSatish Balay irow = rstart; 78516ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 78616ebf90aSShri Abhyankar ajj = aj + ai[i]; /* ptr to the beginning of this row */ 78716ebf90aSShri Abhyankar countA = ai[i + 1] - ai[i]; 78816ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 78916ebf90aSShri Abhyankar bjj = bj + bi[i]; 79016ebf90aSShri Abhyankar v1 = av + ai[i]; 79116ebf90aSShri Abhyankar v2 = bv + bi[i]; 79216ebf90aSShri Abhyankar 79316ebf90aSShri Abhyankar /* A-part */ 79416ebf90aSShri Abhyankar for (j = 0; j < countA; j++) { 795bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 7969566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 7975d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 79816ebf90aSShri Abhyankar } 79916ebf90aSShri Abhyankar val[jj++] = v1[j]; 80016ebf90aSShri Abhyankar } 80116ebf90aSShri Abhyankar 80216ebf90aSShri Abhyankar /* B-part */ 80316ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 804bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 8059566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 8069566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 80716ebf90aSShri Abhyankar } 80816ebf90aSShri Abhyankar val[jj++] = v2[j]; 80916ebf90aSShri Abhyankar } 81016ebf90aSShri Abhyankar irow++; 81116ebf90aSShri Abhyankar } 8129566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 8139566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 8143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 81516ebf90aSShri Abhyankar } 81616ebf90aSShri Abhyankar 817d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 818d71ae5a4SJacob Faibussowitsch { 81967877ebaSShri Abhyankar Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 82067877ebaSShri Abhyankar Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 82167877ebaSShri Abhyankar Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 82267877ebaSShri Abhyankar const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 8235d955bbbSStefano Zampini const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 82433d57670SJed Brown const PetscInt bs2 = mat->bs2; 825a6053eceSJunchao Zhang PetscInt bs; 826a6053eceSJunchao Zhang PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 827a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 82867877ebaSShri Abhyankar const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 82967877ebaSShri Abhyankar PetscScalar *val; 83067877ebaSShri Abhyankar 83167877ebaSShri Abhyankar PetscFunctionBegin; 8329566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 833bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 83467877ebaSShri Abhyankar nz = bs2 * (aa->nz + bb->nz); 8359566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 8369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 837a6053eceSJunchao Zhang mumps->nnz = nz; 838a6053eceSJunchao Zhang mumps->irn = row; 839a6053eceSJunchao Zhang mumps->jcn = col; 840a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 84167877ebaSShri Abhyankar } else { 842a6053eceSJunchao Zhang val = mumps->val; 84367877ebaSShri Abhyankar } 84467877ebaSShri Abhyankar 8459371c9d4SSatish Balay jj = 0; 8469371c9d4SSatish Balay irow = rstart; 84767877ebaSShri Abhyankar for (i = 0; i < mbs; i++) { 84867877ebaSShri Abhyankar countA = ai[i + 1] - ai[i]; 84967877ebaSShri Abhyankar countB = bi[i + 1] - bi[i]; 85067877ebaSShri Abhyankar ajj = aj + ai[i]; 85167877ebaSShri Abhyankar bjj = bj + bi[i]; 85267877ebaSShri Abhyankar v1 = av + bs2 * ai[i]; 85367877ebaSShri Abhyankar v2 = bv + bs2 * bi[i]; 85467877ebaSShri Abhyankar 85567877ebaSShri Abhyankar idx = 0; 85667877ebaSShri Abhyankar /* A-part */ 85767877ebaSShri Abhyankar for (k = 0; k < countA; k++) { 85867877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 85967877ebaSShri Abhyankar for (n = 0; n < bs; n++) { 860bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 8619566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 8625d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 86367877ebaSShri Abhyankar } 86467877ebaSShri Abhyankar val[jj++] = v1[idx++]; 86567877ebaSShri Abhyankar } 86667877ebaSShri Abhyankar } 86767877ebaSShri Abhyankar } 86867877ebaSShri Abhyankar 86967877ebaSShri Abhyankar idx = 0; 87067877ebaSShri Abhyankar /* B-part */ 87167877ebaSShri Abhyankar for (k = 0; k < countB; k++) { 87267877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 87367877ebaSShri Abhyankar for (n = 0; n < bs; n++) { 874bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 8759566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 8769566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 87767877ebaSShri Abhyankar } 878d985c460SShri Abhyankar val[jj++] = v2[idx++]; 87967877ebaSShri Abhyankar } 88067877ebaSShri Abhyankar } 88167877ebaSShri Abhyankar } 882d985c460SShri Abhyankar irow += bs; 88367877ebaSShri Abhyankar } 8843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 88567877ebaSShri Abhyankar } 88667877ebaSShri Abhyankar 887d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 888d71ae5a4SJacob Faibussowitsch { 88916ebf90aSShri Abhyankar const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 890a6053eceSJunchao Zhang PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 891a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 89216ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 89316ebf90aSShri Abhyankar PetscScalar *val; 894a3d589ffSStefano Zampini Mat Ad, Ao; 895a3d589ffSStefano Zampini Mat_SeqAIJ *aa; 896a3d589ffSStefano Zampini Mat_SeqAIJ *bb; 89738548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 898b94d7dedSBarry Smith PetscBool hermitian, isset; 89938548759SBarry Smith #endif 90016ebf90aSShri Abhyankar 90116ebf90aSShri Abhyankar PetscFunctionBegin; 90238548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 903b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 904b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 90538548759SBarry Smith #endif 9069566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 9079566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 9089566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 909a3d589ffSStefano Zampini 910a3d589ffSStefano Zampini aa = (Mat_SeqAIJ *)(Ad)->data; 911a3d589ffSStefano Zampini bb = (Mat_SeqAIJ *)(Ao)->data; 91238548759SBarry Smith ai = aa->i; 91338548759SBarry Smith aj = aa->j; 91438548759SBarry Smith adiag = aa->diag; 91538548759SBarry Smith bi = bb->i; 91638548759SBarry Smith bj = bb->j; 9172205254eSKarl Rupp 91816ebf90aSShri Abhyankar rstart = A->rmap->rstart; 91916ebf90aSShri Abhyankar 920bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 921e0bace9bSHong Zhang nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 922e0bace9bSHong Zhang nzb = 0; /* num of upper triangular entries in mat->B */ 92316ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 924e0bace9bSHong Zhang nza += (ai[i + 1] - adiag[i]); 92516ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 92616ebf90aSShri Abhyankar bjj = bj + bi[i]; 927e0bace9bSHong Zhang for (j = 0; j < countB; j++) { 928e0bace9bSHong Zhang if (garray[bjj[j]] > rstart) nzb++; 929e0bace9bSHong Zhang } 930e0bace9bSHong Zhang } 93116ebf90aSShri Abhyankar 932e0bace9bSHong Zhang nz = nza + nzb; /* total nz of upper triangular part of mat */ 9339566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 9349566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 935a6053eceSJunchao Zhang mumps->nnz = nz; 936a6053eceSJunchao Zhang mumps->irn = row; 937a6053eceSJunchao Zhang mumps->jcn = col; 938a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 93916ebf90aSShri Abhyankar } else { 940a6053eceSJunchao Zhang val = mumps->val; 94116ebf90aSShri Abhyankar } 94216ebf90aSShri Abhyankar 9439371c9d4SSatish Balay jj = 0; 9449371c9d4SSatish Balay irow = rstart; 94516ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 94616ebf90aSShri Abhyankar ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 94716ebf90aSShri Abhyankar v1 = av + adiag[i]; 94816ebf90aSShri Abhyankar countA = ai[i + 1] - adiag[i]; 94916ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 95016ebf90aSShri Abhyankar bjj = bj + bi[i]; 95116ebf90aSShri Abhyankar v2 = bv + bi[i]; 95216ebf90aSShri Abhyankar 95316ebf90aSShri Abhyankar /* A-part */ 95416ebf90aSShri Abhyankar for (j = 0; j < countA; j++) { 955bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 9569566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 9579566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 95816ebf90aSShri Abhyankar } 95916ebf90aSShri Abhyankar val[jj++] = v1[j]; 96016ebf90aSShri Abhyankar } 96116ebf90aSShri Abhyankar 96216ebf90aSShri Abhyankar /* B-part */ 96316ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 96416ebf90aSShri Abhyankar if (garray[bjj[j]] > rstart) { 965bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 9669566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 9679566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 96816ebf90aSShri Abhyankar } 96916ebf90aSShri Abhyankar val[jj++] = v2[j]; 97016ebf90aSShri Abhyankar } 971397b6df1SKris Buschelman } 972397b6df1SKris Buschelman irow++; 973397b6df1SKris Buschelman } 9749566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 9759566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 9763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 977397b6df1SKris Buschelman } 978397b6df1SKris Buschelman 9799d0448ceSStefano Zampini PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 9809d0448ceSStefano Zampini { 9819d0448ceSStefano Zampini Mat **mats; 9829d0448ceSStefano Zampini PetscInt nr, nc; 9839d0448ceSStefano Zampini PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 9849d0448ceSStefano Zampini 9859d0448ceSStefano Zampini PetscFunctionBegin; 9869d0448ceSStefano Zampini PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 9879d0448ceSStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 9889d0448ceSStefano Zampini PetscMUMPSInt *irns, *jcns; 9899d0448ceSStefano Zampini PetscScalar *vals; 9909d0448ceSStefano Zampini PetscInt64 totnnz, cumnnz, maxnnz; 9919d0448ceSStefano Zampini PetscInt *pjcns_w; 9929d0448ceSStefano Zampini IS *rows, *cols; 9939d0448ceSStefano Zampini PetscInt **rows_idx, **cols_idx; 9949d0448ceSStefano Zampini 9959d0448ceSStefano Zampini cumnnz = 0; 9969d0448ceSStefano Zampini maxnnz = 0; 9975d955bbbSStefano Zampini PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 9989d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 9999d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 10009d0448ceSStefano Zampini Mat sub = mats[r][c]; 10019d0448ceSStefano Zampini 10029d0448ceSStefano Zampini mumps->nest_convert_to_triples[r * nc + c] = NULL; 10039d0448ceSStefano Zampini if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 10049d0448ceSStefano Zampini if (sub) { 10059d0448ceSStefano Zampini PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 10065d955bbbSStefano Zampini PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE; 10079d0448ceSStefano Zampini MatInfo info; 10089d0448ceSStefano Zampini 10095d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 10105d955bbbSStefano Zampini if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 10115d955bbbSStefano Zampini else { 10125d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 10135d955bbbSStefano Zampini if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 10145d955bbbSStefano Zampini } 10159d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 10169d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 10179d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 10189d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 10199d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 10209d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 10219d0448ceSStefano Zampini 10229d0448ceSStefano Zampini if (chol) { 10239d0448ceSStefano Zampini if (r == c) { 10249d0448ceSStefano Zampini if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 10259d0448ceSStefano Zampini else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 10269d0448ceSStefano Zampini else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 10279d0448ceSStefano Zampini else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 10289d0448ceSStefano Zampini } else { 10299d0448ceSStefano Zampini if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 10309d0448ceSStefano Zampini else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 10319d0448ceSStefano Zampini else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 10329d0448ceSStefano Zampini else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 10339d0448ceSStefano Zampini } 10349d0448ceSStefano Zampini } else { 10359d0448ceSStefano Zampini if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 10369d0448ceSStefano Zampini else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 10379d0448ceSStefano Zampini else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 10389d0448ceSStefano Zampini else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 10399d0448ceSStefano Zampini } 10409d0448ceSStefano Zampini PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 10419d0448ceSStefano Zampini mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 10429d0448ceSStefano Zampini PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 10439d0448ceSStefano Zampini cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 10449d0448ceSStefano Zampini maxnnz = PetscMax(maxnnz, info.nz_used); 10459d0448ceSStefano Zampini } 10469d0448ceSStefano Zampini } 10479d0448ceSStefano Zampini } 10489d0448ceSStefano Zampini 10499d0448ceSStefano Zampini /* Allocate total COO */ 10509d0448ceSStefano Zampini totnnz = cumnnz; 10519d0448ceSStefano Zampini PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 10529d0448ceSStefano Zampini PetscCall(PetscMalloc1(totnnz, &vals)); 10539d0448ceSStefano Zampini 10549d0448ceSStefano Zampini /* Handle rows and column maps 10559d0448ceSStefano Zampini We directly map rows and use an SF for the columns */ 10569d0448ceSStefano Zampini PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 10579d0448ceSStefano Zampini PetscCall(MatNestGetISs(A, rows, cols)); 10589d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 10599d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 10609d0448ceSStefano Zampini if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 10615d955bbbSStefano Zampini else (void)maxnnz; 10629d0448ceSStefano Zampini 10639d0448ceSStefano Zampini cumnnz = 0; 10649d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 10659d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 10669d0448ceSStefano Zampini Mat sub = mats[r][c]; 10679d0448ceSStefano Zampini const PetscInt *ridx = rows_idx[r]; 10685d955bbbSStefano Zampini const PetscInt *cidx = cols_idx[c]; 10699d0448ceSStefano Zampini PetscInt rst; 10709d0448ceSStefano Zampini PetscSF csf; 10715d955bbbSStefano Zampini PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 10725d955bbbSStefano Zampini PetscLayout cmap; 10739d0448ceSStefano Zampini 10749d0448ceSStefano Zampini mumps->nest_vals_start[r * nc + c] = cumnnz; 10759d0448ceSStefano Zampini if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 10769d0448ceSStefano Zampini 10775d955bbbSStefano Zampini /* Extract inner blocks if needed */ 10785d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 10795d955bbbSStefano Zampini if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 10805d955bbbSStefano Zampini else { 10815d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 10825d955bbbSStefano Zampini if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 10835d955bbbSStefano Zampini } 10845d955bbbSStefano Zampini swap = (PetscBool)(isTrans || isHTrans); 10855d955bbbSStefano Zampini 10865d955bbbSStefano Zampini /* Get column layout to map off-process columns */ 10875d955bbbSStefano Zampini PetscCall(MatGetLayouts(sub, NULL, &cmap)); 10885d955bbbSStefano Zampini 10895d955bbbSStefano Zampini /* Get row start to map on-process rows */ 10905d955bbbSStefano Zampini PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 10915d955bbbSStefano Zampini 10929d0448ceSStefano Zampini /* Directly use the mumps datastructure and use C ordering for now */ 10939d0448ceSStefano Zampini PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 10949d0448ceSStefano Zampini 10955d955bbbSStefano Zampini /* Swap the role of rows and columns indices for transposed blocks 10965d955bbbSStefano Zampini since we need values with global final ordering */ 10975d955bbbSStefano Zampini if (swap) { 10985d955bbbSStefano Zampini cidx = rows_idx[r]; 10995d955bbbSStefano Zampini ridx = cols_idx[c]; 11009d0448ceSStefano Zampini } 11019d0448ceSStefano Zampini 11025d955bbbSStefano Zampini /* Communicate column indices 11035d955bbbSStefano Zampini This could have been done with a single SF but it would have complicated the code a lot. 11045d955bbbSStefano Zampini But since we do it only once, we pay the price of setting up an SF for each block */ 11055d955bbbSStefano Zampini if (PetscDefined(USE_64BIT_INDICES)) { 11065d955bbbSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 11075d955bbbSStefano Zampini } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 11089d0448ceSStefano Zampini PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 11095d955bbbSStefano Zampini PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 11105d955bbbSStefano Zampini PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 11115d955bbbSStefano Zampini PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 11129d0448ceSStefano Zampini PetscCall(PetscSFDestroy(&csf)); 11139d0448ceSStefano Zampini 11145d955bbbSStefano Zampini /* Import indices: use direct map for rows and mapped indices for columns */ 11155d955bbbSStefano Zampini if (swap) { 11165d955bbbSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) { 11175d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 11185d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 11195d955bbbSStefano Zampini } 11205d955bbbSStefano Zampini } else { 11215d955bbbSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) { 11225d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 11235d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 11245d955bbbSStefano Zampini } 11255d955bbbSStefano Zampini } 11265d955bbbSStefano Zampini 11275d955bbbSStefano Zampini /* Import values to full COO */ 11285d955bbbSStefano Zampini if (isHTrans) { /* conjugate the entries */ 11295d955bbbSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 11305d955bbbSStefano Zampini } 11315d955bbbSStefano Zampini PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 11329d0448ceSStefano Zampini 11339d0448ceSStefano Zampini /* Shift new starting point and sanity check */ 11349d0448ceSStefano Zampini cumnnz += mumps->nnz; 11359d0448ceSStefano Zampini PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 11369d0448ceSStefano Zampini 11379d0448ceSStefano Zampini /* Free scratch memory */ 11389d0448ceSStefano Zampini PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 11399d0448ceSStefano Zampini PetscCall(PetscFree(mumps->val_alloc)); 11409d0448ceSStefano Zampini mumps->val = NULL; 11419d0448ceSStefano Zampini mumps->nnz = 0; 11429d0448ceSStefano Zampini } 11439d0448ceSStefano Zampini } 11449d0448ceSStefano Zampini if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 11459d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 11469d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 11479d0448ceSStefano Zampini PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 11489d0448ceSStefano Zampini if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 11495d955bbbSStefano Zampini mumps->nest_vals_start[nr * nc] = cumnnz; 11509d0448ceSStefano Zampini 11519d0448ceSStefano Zampini /* Set pointers for final MUMPS data structure */ 11529d0448ceSStefano Zampini mumps->nest_vals = vals; 11539d0448ceSStefano Zampini mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 11549d0448ceSStefano Zampini mumps->val = vals; 11559d0448ceSStefano Zampini mumps->irn = irns; 11569d0448ceSStefano Zampini mumps->jcn = jcns; 11579d0448ceSStefano Zampini mumps->nnz = cumnnz; 11589d0448ceSStefano Zampini } else { 11599d0448ceSStefano Zampini PetscScalar *oval = mumps->nest_vals; 11609d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 11619d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 11625d955bbbSStefano Zampini PetscBool isTrans, isHTrans = PETSC_FALSE; 11635d955bbbSStefano Zampini Mat sub = mats[r][c]; 11645d955bbbSStefano Zampini PetscInt midx = r * nc + c; 11655d955bbbSStefano Zampini 11665d955bbbSStefano Zampini if (!mumps->nest_convert_to_triples[midx]) continue; 11675d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 11685d955bbbSStefano Zampini if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 11695d955bbbSStefano Zampini else { 11705d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 11715d955bbbSStefano Zampini if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 11725d955bbbSStefano Zampini } 11735d955bbbSStefano Zampini mumps->val = oval + mumps->nest_vals_start[midx]; 11745d955bbbSStefano Zampini PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 11755d955bbbSStefano Zampini if (isHTrans) { 11765d955bbbSStefano Zampini PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 11775d955bbbSStefano Zampini for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 11785d955bbbSStefano Zampini } 11799d0448ceSStefano Zampini } 11809d0448ceSStefano Zampini } 11819d0448ceSStefano Zampini mumps->val = oval; 11829d0448ceSStefano Zampini } 11839d0448ceSStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 11849d0448ceSStefano Zampini } 11859d0448ceSStefano Zampini 1186d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_MUMPS(Mat A) 1187d71ae5a4SJacob Faibussowitsch { 1188a6053eceSJunchao Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1189b24902e0SBarry Smith 1190397b6df1SKris Buschelman PetscFunctionBegin; 11919566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 11929566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 11939566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 11949566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->b_seq)); 11959566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->x_seq)); 11969566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.perm_in)); 11979566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 11989566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->val_alloc)); 11999566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->info)); 1200413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->ICNTL_pre)); 1201413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->CNTL_pre)); 12029566063dSJacob Faibussowitsch PetscCall(MatMumpsResetSchur_Private(mumps)); 1203413bcc21SPierre Jolivet if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1204a5e57a09SHong Zhang mumps->id.job = JOB_END; 12053ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 120608401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1207413bcc21SPierre Jolivet if (mumps->mumps_comm != MPI_COMM_NULL) { 1208413bcc21SPierre Jolivet if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1209413bcc21SPierre Jolivet else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1210413bcc21SPierre Jolivet } 1211413bcc21SPierre Jolivet } 12123ab56b82SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 121367602552SJunchao Zhang if (mumps->use_petsc_omp_support) { 12149566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 12159566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 12169566063dSJacob Faibussowitsch PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 121767602552SJunchao Zhang } 12183ab56b82SJunchao Zhang #endif 12199566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ia_alloc)); 12209566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ja_alloc)); 12219566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->recvcount)); 12229566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->reqs)); 12239566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->irhs_loc)); 12249d0448ceSStefano Zampini PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 12259d0448ceSStefano Zampini PetscCall(PetscFree(mumps->nest_vals)); 12269566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1227bf0cc555SLisandro Dalcin 122897969023SHong Zhang /* clear composed functions */ 12299566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 12309566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 12319566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 12329566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 12339566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 12349566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 12359566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 12369566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 12379566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 12389566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 12399566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 12405c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 12419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 12429566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 12433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1244397b6df1SKris Buschelman } 1245397b6df1SKris Buschelman 124667602552SJunchao Zhang /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1247d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1248d71ae5a4SJacob Faibussowitsch { 124967602552SJunchao Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 125067602552SJunchao Zhang const PetscMPIInt ompsize = mumps->omp_comm_size; 125167602552SJunchao Zhang PetscInt i, m, M, rstart; 125267602552SJunchao Zhang 125367602552SJunchao Zhang PetscFunctionBegin; 12549566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &M, NULL)); 12559566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &m, NULL)); 125608401ef6SPierre Jolivet PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 125767602552SJunchao Zhang if (ompsize == 1) { 125867602552SJunchao Zhang if (!mumps->irhs_loc) { 125967602552SJunchao Zhang mumps->nloc_rhs = m; 12609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 12619566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 126267602552SJunchao Zhang for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 126367602552SJunchao Zhang } 126467602552SJunchao Zhang mumps->id.rhs_loc = (MumpsScalar *)array; 126567602552SJunchao Zhang } else { 126667602552SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 126767602552SJunchao Zhang const PetscInt *ranges; 126867602552SJunchao Zhang PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 126967602552SJunchao Zhang MPI_Group petsc_group, omp_group; 127067602552SJunchao Zhang PetscScalar *recvbuf = NULL; 127167602552SJunchao Zhang 127267602552SJunchao Zhang if (mumps->is_omp_master) { 127367602552SJunchao Zhang /* Lazily initialize the omp stuff for distributed rhs */ 127467602552SJunchao Zhang if (!mumps->irhs_loc) { 12759566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 12769566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 12779566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 12789566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 127967602552SJunchao Zhang for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 12809566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 128167602552SJunchao Zhang 128267602552SJunchao Zhang /* Populate mumps->irhs_loc[], rhs_nrow[] */ 128367602552SJunchao Zhang mumps->nloc_rhs = 0; 12849566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(A, &ranges)); 128567602552SJunchao Zhang for (j = 0; j < ompsize; j++) { 128667602552SJunchao Zhang mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 128767602552SJunchao Zhang mumps->nloc_rhs += mumps->rhs_nrow[j]; 128867602552SJunchao Zhang } 12899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 129067602552SJunchao Zhang for (j = k = 0; j < ompsize; j++) { 129167602552SJunchao Zhang for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 129267602552SJunchao Zhang } 129367602552SJunchao Zhang 12949566063dSJacob Faibussowitsch PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 12959566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&petsc_group)); 12969566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&omp_group)); 129767602552SJunchao Zhang } 129867602552SJunchao Zhang 129967602552SJunchao Zhang /* Realloc buffers when current nrhs is bigger than what we have met */ 130067602552SJunchao Zhang if (nrhs > mumps->max_nrhs) { 13019566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 13029566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 130367602552SJunchao Zhang mumps->max_nrhs = nrhs; 130467602552SJunchao Zhang } 130567602552SJunchao Zhang 130667602552SJunchao Zhang /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 13079566063dSJacob Faibussowitsch for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 130867602552SJunchao Zhang mumps->rhs_disps[0] = 0; 130967602552SJunchao Zhang for (j = 1; j < ompsize; j++) { 131067602552SJunchao Zhang mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 131108401ef6SPierre Jolivet PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 131267602552SJunchao Zhang } 131367602552SJunchao Zhang recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 131467602552SJunchao Zhang } 131567602552SJunchao Zhang 13169566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 13179566063dSJacob Faibussowitsch PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 131867602552SJunchao Zhang 131967602552SJunchao Zhang if (mumps->is_omp_master) { 132067602552SJunchao Zhang if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 132167602552SJunchao Zhang PetscScalar *dst, *dstbase = mumps->rhs_loc; 132267602552SJunchao Zhang for (j = 0; j < ompsize; j++) { 132367602552SJunchao Zhang const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 132467602552SJunchao Zhang dst = dstbase; 132567602552SJunchao Zhang for (i = 0; i < nrhs; i++) { 13269566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 132767602552SJunchao Zhang src += mumps->rhs_nrow[j]; 132867602552SJunchao Zhang dst += mumps->nloc_rhs; 132967602552SJunchao Zhang } 133067602552SJunchao Zhang dstbase += mumps->rhs_nrow[j]; 133167602552SJunchao Zhang } 133267602552SJunchao Zhang } 133367602552SJunchao Zhang mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 133467602552SJunchao Zhang } 133567602552SJunchao Zhang #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 133667602552SJunchao Zhang } 133767602552SJunchao Zhang mumps->id.nrhs = nrhs; 133867602552SJunchao Zhang mumps->id.nloc_rhs = mumps->nloc_rhs; 133967602552SJunchao Zhang mumps->id.lrhs_loc = mumps->nloc_rhs; 134067602552SJunchao Zhang mumps->id.irhs_loc = mumps->irhs_loc; 13413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 134267602552SJunchao Zhang } 134367602552SJunchao Zhang 1344d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1345d71ae5a4SJacob Faibussowitsch { 1346e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 134725aac85cSJunchao Zhang const PetscScalar *rarray = NULL; 1348d54de34fSKris Buschelman PetscScalar *array; 1349329ec9b3SHong Zhang IS is_iden, is_petsc; 1350329ec9b3SHong Zhang PetscInt i; 1351cc86f929SStefano Zampini PetscBool second_solve = PETSC_FALSE; 1352883f2eb9SBarry Smith static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1353397b6df1SKris Buschelman 1354397b6df1SKris Buschelman PetscFunctionBegin; 13559371c9d4SSatish Balay PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 13569371c9d4SSatish Balay "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 13579371c9d4SSatish Balay &cite1)); 13589371c9d4SSatish Balay PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 13599371c9d4SSatish Balay "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 13609371c9d4SSatish Balay &cite2)); 13612aca8efcSHong Zhang 1362603e8f96SBarry Smith if (A->factorerrortype) { 13639566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 13649566063dSJacob Faibussowitsch PetscCall(VecSetInf(x)); 13653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13662aca8efcSHong Zhang } 13672aca8efcSHong Zhang 1368a5e57a09SHong Zhang mumps->id.nrhs = 1; 13692d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 137025aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 137167602552SJunchao Zhang mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 13729566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(b, &rarray)); 13739566063dSJacob Faibussowitsch PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 137425aac85cSJunchao Zhang } else { 137541ffd417SStefano Zampini mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 13769566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 13779566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 137867602552SJunchao Zhang if (!mumps->myid) { 13799566063dSJacob Faibussowitsch PetscCall(VecGetArray(mumps->b_seq, &array)); 138067602552SJunchao Zhang mumps->id.rhs = (MumpsScalar *)array; 138167602552SJunchao Zhang } 138225aac85cSJunchao Zhang } 13833ab56b82SJunchao Zhang } else { /* petsc_size == 1 */ 138467602552SJunchao Zhang mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 13859566063dSJacob Faibussowitsch PetscCall(VecCopy(b, x)); 13869566063dSJacob Faibussowitsch PetscCall(VecGetArray(x, &array)); 1387940cd9d6SSatish Balay mumps->id.rhs = (MumpsScalar *)array; 1388397b6df1SKris Buschelman } 1389397b6df1SKris Buschelman 1390cc86f929SStefano Zampini /* 1391cc86f929SStefano Zampini handle condensation step of Schur complement (if any) 1392cc86f929SStefano Zampini We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1393cc86f929SStefano Zampini According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1394cc86f929SStefano Zampini Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1395cc86f929SStefano Zampini This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1396cc86f929SStefano Zampini */ 1397583f777eSStefano Zampini if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 139808401ef6SPierre Jolivet PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1399cc86f929SStefano Zampini second_solve = PETSC_TRUE; 14009566063dSJacob Faibussowitsch PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1401cc86f929SStefano Zampini } 1402397b6df1SKris Buschelman /* solve phase */ 1403a5e57a09SHong Zhang mumps->id.job = JOB_SOLVE; 14043ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 140508401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1406397b6df1SKris Buschelman 1407b5fa320bSStefano Zampini /* handle expansion step of Schur complement (if any) */ 14081baa6e33SBarry Smith if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1409b5fa320bSStefano Zampini 14102d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1411a5e57a09SHong Zhang if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1412a5e57a09SHong Zhang /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 14139566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1414397b6df1SKris Buschelman } 1415a5e57a09SHong Zhang if (!mumps->scat_sol) { /* create scatter scat_sol */ 1416a6053eceSJunchao Zhang PetscInt *isol2_loc = NULL; 14179566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 14189566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1419a6053eceSJunchao Zhang for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 14209566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 14219566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 14229566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_iden)); 14239566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_petsc)); 1424a5e57a09SHong Zhang mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1425397b6df1SKris Buschelman } 1426a5e57a09SHong Zhang 14279566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 14289566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1429329ec9b3SHong Zhang } 1430353d7d71SJunchao Zhang 143167602552SJunchao Zhang if (mumps->petsc_size > 1) { 143225aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 14339566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(b, &rarray)); 143425aac85cSJunchao Zhang } else if (!mumps->myid) { 14359566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mumps->b_seq, &array)); 143625aac85cSJunchao Zhang } 14379566063dSJacob Faibussowitsch } else PetscCall(VecRestoreArray(x, &array)); 1438353d7d71SJunchao Zhang 14399566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3))); 14403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1441397b6df1SKris Buschelman } 1442397b6df1SKris Buschelman 1443d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1444d71ae5a4SJacob Faibussowitsch { 1445e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 144651d5961aSHong Zhang 144751d5961aSHong Zhang PetscFunctionBegin; 1448a5e57a09SHong Zhang mumps->id.ICNTL(9) = 0; 14499566063dSJacob Faibussowitsch PetscCall(MatSolve_MUMPS(A, b, x)); 1450a5e57a09SHong Zhang mumps->id.ICNTL(9) = 1; 14513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 145251d5961aSHong Zhang } 145351d5961aSHong Zhang 1454d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1455d71ae5a4SJacob Faibussowitsch { 1456b8491c3eSStefano Zampini Mat Bt = NULL; 1457a6053eceSJunchao Zhang PetscBool denseX, denseB, flg, flgT; 1458e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1459334c5f61SHong Zhang PetscInt i, nrhs, M; 14601683a169SBarry Smith PetscScalar *array; 14611683a169SBarry Smith const PetscScalar *rbray; 1462a6053eceSJunchao Zhang PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1463a6053eceSJunchao Zhang PetscMUMPSInt *isol_loc, *isol_loc_save; 14641683a169SBarry Smith PetscScalar *bray, *sol_loc, *sol_loc_save; 1465be818407SHong Zhang IS is_to, is_from; 1466beae5ec0SHong Zhang PetscInt k, proc, j, m, myrstart; 1467be818407SHong Zhang const PetscInt *rstart; 146867602552SJunchao Zhang Vec v_mpi, msol_loc; 146967602552SJunchao Zhang VecScatter scat_sol; 147067602552SJunchao Zhang Vec b_seq; 147167602552SJunchao Zhang VecScatter scat_rhs; 1472be818407SHong Zhang PetscScalar *aa; 1473be818407SHong Zhang PetscInt spnr, *ia, *ja; 1474d56c302dSHong Zhang Mat_MPIAIJ *b = NULL; 1475bda8bf91SBarry Smith 1476e0b74bf9SHong Zhang PetscFunctionBegin; 14779566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 147828b400f6SJacob Faibussowitsch PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1479be818407SHong Zhang 14809566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1481a6053eceSJunchao Zhang if (denseB) { 148208401ef6SPierre Jolivet PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1483be818407SHong Zhang mumps->id.ICNTL(20) = 0; /* dense RHS */ 14840e6b8875SHong Zhang } else { /* sparse B */ 148508401ef6SPierre Jolivet PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1486013e2dc7SBarry Smith PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1487da81f932SPierre Jolivet if (flgT) { /* input B is transpose of actual RHS matrix, 14880e6b8875SHong Zhang because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 14899566063dSJacob Faibussowitsch PetscCall(MatTransposeGetMat(B, &Bt)); 1490013e2dc7SBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1491be818407SHong Zhang mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1492b8491c3eSStefano Zampini } 149387b22cf4SHong Zhang 14949566063dSJacob Faibussowitsch PetscCall(MatGetSize(B, &M, &nrhs)); 14959481e6e9SHong Zhang mumps->id.nrhs = nrhs; 14969481e6e9SHong Zhang mumps->id.lrhs = M; 14972b691707SHong Zhang mumps->id.rhs = NULL; 14989481e6e9SHong Zhang 14992d4298aeSJunchao Zhang if (mumps->petsc_size == 1) { 1500b8491c3eSStefano Zampini PetscScalar *aa; 1501b8491c3eSStefano Zampini PetscInt spnr, *ia, *ja; 1502e94cce23SStefano Zampini PetscBool second_solve = PETSC_FALSE; 1503b8491c3eSStefano Zampini 15049566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &array)); 1505b8491c3eSStefano Zampini mumps->id.rhs = (MumpsScalar *)array; 15062b691707SHong Zhang 1507a6053eceSJunchao Zhang if (denseB) { 15082b691707SHong Zhang /* copy B to X */ 15099566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &rbray)); 15109566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 15119566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 15122b691707SHong Zhang } else { /* sparse B */ 15139566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(Bt, &aa)); 15149566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 151528b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 15169566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1517b8491c3eSStefano Zampini mumps->id.rhs_sparse = (MumpsScalar *)aa; 1518b8491c3eSStefano Zampini } 1519e94cce23SStefano Zampini /* handle condensation step of Schur complement (if any) */ 1520583f777eSStefano Zampini if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1521e94cce23SStefano Zampini second_solve = PETSC_TRUE; 15229566063dSJacob Faibussowitsch PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1523e94cce23SStefano Zampini } 15242cd7d884SHong Zhang /* solve phase */ 15252cd7d884SHong Zhang mumps->id.job = JOB_SOLVE; 15263ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 152708401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1528b5fa320bSStefano Zampini 1529b5fa320bSStefano Zampini /* handle expansion step of Schur complement (if any) */ 15301baa6e33SBarry Smith if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1531a6053eceSJunchao Zhang if (!denseB) { /* sparse B */ 15329566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 15339566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 153428b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1535b8491c3eSStefano Zampini } 15369566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &array)); 15373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1538be818407SHong Zhang } 1539801fbe65SHong Zhang 15402ef1f0ffSBarry Smith /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1541aed4548fSBarry Smith PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1542241dbb5eSStefano Zampini 1543beae5ec0SHong Zhang /* create msol_loc to hold mumps local solution */ 15441683a169SBarry Smith isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 15451683a169SBarry Smith sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1546801fbe65SHong Zhang 1547a1dfcbd9SJunchao Zhang lsol_loc = mumps->id.lsol_loc; 154871aed81dSHong Zhang nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 15499566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1550940cd9d6SSatish Balay mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1551801fbe65SHong Zhang mumps->id.isol_loc = isol_loc; 1552801fbe65SHong Zhang 15539566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 15542cd7d884SHong Zhang 155567602552SJunchao Zhang if (denseB) { 155625aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 155767602552SJunchao Zhang mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 15589566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &rbray)); 15599566063dSJacob Faibussowitsch PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 15609566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 15619566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(B, &m, NULL)); 15629566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 156325aac85cSJunchao Zhang } else { 156425aac85cSJunchao Zhang mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 156580577c12SJunchao Zhang /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 156680577c12SJunchao Zhang very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 156780577c12SJunchao Zhang 0, re-arrange B into desired order, which is a local operation. 156880577c12SJunchao Zhang */ 156980577c12SJunchao Zhang 157067602552SJunchao Zhang /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1571be818407SHong Zhang /* wrap dense rhs matrix B into a vector v_mpi */ 15729566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(B, &m, NULL)); 15739566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(B, &bray)); 15749566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 15759566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(B, &bray)); 15762b691707SHong Zhang 1577be818407SHong Zhang /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1578801fbe65SHong Zhang if (!mumps->myid) { 1579beae5ec0SHong Zhang PetscInt *idx; 1580beae5ec0SHong Zhang /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 15819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrhs * M, &idx)); 15829566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(B, &rstart)); 1583be818407SHong Zhang k = 0; 15842d4298aeSJunchao Zhang for (proc = 0; proc < mumps->petsc_size; proc++) { 1585be818407SHong Zhang for (j = 0; j < nrhs; j++) { 1586beae5ec0SHong Zhang for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1587be818407SHong Zhang } 1588be818407SHong Zhang } 1589be818407SHong Zhang 15909566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 15919566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 15929566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1593801fbe65SHong Zhang } else { 15949566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 15959566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 15969566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1597801fbe65SHong Zhang } 15989566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 15999566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 16009566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_to)); 16019566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_from)); 16029566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1603801fbe65SHong Zhang 1604801fbe65SHong Zhang if (!mumps->myid) { /* define rhs on the host */ 16059566063dSJacob Faibussowitsch PetscCall(VecGetArray(b_seq, &bray)); 1606940cd9d6SSatish Balay mumps->id.rhs = (MumpsScalar *)bray; 16079566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b_seq, &bray)); 1608801fbe65SHong Zhang } 160925aac85cSJunchao Zhang } 16102b691707SHong Zhang } else { /* sparse B */ 16112b691707SHong Zhang b = (Mat_MPIAIJ *)Bt->data; 16122b691707SHong Zhang 1613be818407SHong Zhang /* wrap dense X into a vector v_mpi */ 16149566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(X, &m, NULL)); 16159566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &bray)); 16169566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 16179566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &bray)); 16182b691707SHong Zhang 16192b691707SHong Zhang if (!mumps->myid) { 16209566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(b->A, &aa)); 16219566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 162228b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 16239566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 16242b691707SHong Zhang mumps->id.rhs_sparse = (MumpsScalar *)aa; 16252b691707SHong Zhang } else { 16262b691707SHong Zhang mumps->id.irhs_ptr = NULL; 16272b691707SHong Zhang mumps->id.irhs_sparse = NULL; 16282b691707SHong Zhang mumps->id.nz_rhs = 0; 16292b691707SHong Zhang mumps->id.rhs_sparse = NULL; 16302b691707SHong Zhang } 16312b691707SHong Zhang } 16322b691707SHong Zhang 1633801fbe65SHong Zhang /* solve phase */ 1634801fbe65SHong Zhang mumps->id.job = JOB_SOLVE; 16353ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 163608401ef6SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1637801fbe65SHong Zhang 1638334c5f61SHong Zhang /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 16399566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &array)); 16409566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(v_mpi, array)); 1641801fbe65SHong Zhang 1642334c5f61SHong Zhang /* create scatter scat_sol */ 16439566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(X, &rstart)); 1644beae5ec0SHong Zhang /* iidx: index for scatter mumps solution to petsc X */ 1645beae5ec0SHong Zhang 16469566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 16479566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1648beae5ec0SHong Zhang for (i = 0; i < lsol_loc; i++) { 1649beae5ec0SHong Zhang isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1650beae5ec0SHong Zhang 16512d4298aeSJunchao Zhang for (proc = 0; proc < mumps->petsc_size; proc++) { 1652beae5ec0SHong Zhang if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1653beae5ec0SHong Zhang myrstart = rstart[proc]; 1654beae5ec0SHong Zhang k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1655beae5ec0SHong Zhang iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1656beae5ec0SHong Zhang m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1657beae5ec0SHong Zhang break; 1658be818407SHong Zhang } 1659be818407SHong Zhang } 1660be818407SHong Zhang 1661beae5ec0SHong Zhang for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1662801fbe65SHong Zhang } 16639566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 16649566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 16659566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 16669566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_from)); 16679566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_to)); 16689566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 16699566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &array)); 167071aed81dSHong Zhang 167171aed81dSHong Zhang /* free spaces */ 16721683a169SBarry Smith mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 167371aed81dSHong Zhang mumps->id.isol_loc = isol_loc_save; 167471aed81dSHong Zhang 16759566063dSJacob Faibussowitsch PetscCall(PetscFree2(sol_loc, isol_loc)); 16769566063dSJacob Faibussowitsch PetscCall(PetscFree(idxx)); 16779566063dSJacob Faibussowitsch PetscCall(VecDestroy(&msol_loc)); 16789566063dSJacob Faibussowitsch PetscCall(VecDestroy(&v_mpi)); 1679a6053eceSJunchao Zhang if (!denseB) { 16802b691707SHong Zhang if (!mumps->myid) { 1681d56c302dSHong Zhang b = (Mat_MPIAIJ *)Bt->data; 16829566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 16839566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 168428b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 16852b691707SHong Zhang } 16862b691707SHong Zhang } else { 168725aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { 16889566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b_seq)); 16899566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&scat_rhs)); 169025aac85cSJunchao Zhang } 16912b691707SHong Zhang } 16929566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&scat_sol)); 16939566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3))); 16943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1695e0b74bf9SHong Zhang } 1696e0b74bf9SHong Zhang 1697d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1698d71ae5a4SJacob Faibussowitsch { 1699b18964edSHong Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1700b18964edSHong Zhang PetscMUMPSInt oldvalue = mumps->id.ICNTL(9); 1701b18964edSHong Zhang 1702b18964edSHong Zhang PetscFunctionBegin; 1703b18964edSHong Zhang mumps->id.ICNTL(9) = 0; 1704b18964edSHong Zhang PetscCall(MatMatSolve_MUMPS(A, B, X)); 1705b18964edSHong Zhang mumps->id.ICNTL(9) = oldvalue; 17063ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1707b18964edSHong Zhang } 1708b18964edSHong Zhang 1709d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1710d71ae5a4SJacob Faibussowitsch { 1711eb3ef3b2SHong Zhang PetscBool flg; 1712eb3ef3b2SHong Zhang Mat B; 1713eb3ef3b2SHong Zhang 1714eb3ef3b2SHong Zhang PetscFunctionBegin; 17159566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 171628b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1717eb3ef3b2SHong Zhang 1718eb3ef3b2SHong Zhang /* Create B=Bt^T that uses Bt's data structure */ 17199566063dSJacob Faibussowitsch PetscCall(MatCreateTranspose(Bt, &B)); 1720eb3ef3b2SHong Zhang 17219566063dSJacob Faibussowitsch PetscCall(MatMatSolve_MUMPS(A, B, X)); 17229566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 17233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1724eb3ef3b2SHong Zhang } 1725eb3ef3b2SHong Zhang 1726ace3df97SHong Zhang #if !defined(PETSC_USE_COMPLEX) 1727a58c3f20SHong Zhang /* 1728a58c3f20SHong Zhang input: 1729a58c3f20SHong Zhang F: numeric factor 1730a58c3f20SHong Zhang output: 1731a58c3f20SHong Zhang nneg: total number of negative pivots 173219d49a3bSHong Zhang nzero: total number of zero pivots 173319d49a3bSHong Zhang npos: (global dimension of F) - nneg - nzero 1734a58c3f20SHong Zhang */ 1735d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1736d71ae5a4SJacob Faibussowitsch { 1737e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1738c1490034SHong Zhang PetscMPIInt size; 1739a58c3f20SHong Zhang 1740a58c3f20SHong Zhang PetscFunctionBegin; 17419566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1742bcb30aebSHong Zhang /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1743aed4548fSBarry Smith PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1744ed85ac9fSHong Zhang 1745710ac8efSHong Zhang if (nneg) *nneg = mumps->id.INFOG(12); 1746ed85ac9fSHong Zhang if (nzero || npos) { 174708401ef6SPierre Jolivet PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1748710ac8efSHong Zhang if (nzero) *nzero = mumps->id.INFOG(28); 1749710ac8efSHong Zhang if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1750a58c3f20SHong Zhang } 17513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1752a58c3f20SHong Zhang } 175319d49a3bSHong Zhang #endif 1754a58c3f20SHong Zhang 1755d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1756d71ae5a4SJacob Faibussowitsch { 1757a6053eceSJunchao Zhang PetscInt i, nreqs; 1758a6053eceSJunchao Zhang PetscMUMPSInt *irn, *jcn; 1759a6053eceSJunchao Zhang PetscMPIInt count; 1760a6053eceSJunchao Zhang PetscInt64 totnnz, remain; 1761a6053eceSJunchao Zhang const PetscInt osize = mumps->omp_comm_size; 1762a6053eceSJunchao Zhang PetscScalar *val; 17633ab56b82SJunchao Zhang 17643ab56b82SJunchao Zhang PetscFunctionBegin; 1765a6053eceSJunchao Zhang if (osize > 1) { 17663ab56b82SJunchao Zhang if (reuse == MAT_INITIAL_MATRIX) { 17673ab56b82SJunchao Zhang /* master first gathers counts of nonzeros to receive */ 17689566063dSJacob Faibussowitsch if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 17699566063dSJacob Faibussowitsch PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 17703ab56b82SJunchao Zhang 1771a6053eceSJunchao Zhang /* Then each computes number of send/recvs */ 17723ab56b82SJunchao Zhang if (mumps->is_omp_master) { 1773a6053eceSJunchao Zhang /* Start from 1 since self communication is not done in MPI */ 1774a6053eceSJunchao Zhang nreqs = 0; 1775a6053eceSJunchao Zhang for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1776a6053eceSJunchao Zhang } else { 1777a6053eceSJunchao Zhang nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 17783ab56b82SJunchao Zhang } 177935cb6cd3SPierre Jolivet PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 17803ab56b82SJunchao Zhang 1781a6053eceSJunchao Zhang /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1782a6053eceSJunchao Zhang MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1783a6053eceSJunchao Zhang might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1784a6053eceSJunchao Zhang is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1785a6053eceSJunchao Zhang */ 1786a6053eceSJunchao Zhang nreqs = 0; /* counter for actual send/recvs */ 17873ab56b82SJunchao Zhang if (mumps->is_omp_master) { 1788a6053eceSJunchao Zhang for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 17899566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 17909566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(totnnz, &val)); 1791a6053eceSJunchao Zhang 1792a6053eceSJunchao Zhang /* Self communication */ 17939566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 17949566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 17959566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1796a6053eceSJunchao Zhang 1797a6053eceSJunchao Zhang /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 17989566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 17999566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->val_alloc)); 1800a6053eceSJunchao Zhang mumps->nnz = totnnz; 18013ab56b82SJunchao Zhang mumps->irn = irn; 18023ab56b82SJunchao Zhang mumps->jcn = jcn; 1803a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 1804a6053eceSJunchao Zhang 1805a6053eceSJunchao Zhang irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1806a6053eceSJunchao Zhang jcn += mumps->recvcount[0]; 1807a6053eceSJunchao Zhang val += mumps->recvcount[0]; 1808a6053eceSJunchao Zhang 1809a6053eceSJunchao Zhang /* Remote communication */ 1810a6053eceSJunchao Zhang for (i = 1; i < osize; i++) { 1811a6053eceSJunchao Zhang count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1812a6053eceSJunchao Zhang remain = mumps->recvcount[i] - count; 1813a6053eceSJunchao Zhang while (count > 0) { 18149566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 18159566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 18169566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1817a6053eceSJunchao Zhang irn += count; 1818a6053eceSJunchao Zhang jcn += count; 1819a6053eceSJunchao Zhang val += count; 1820a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1821a6053eceSJunchao Zhang remain -= count; 1822a6053eceSJunchao Zhang } 18233ab56b82SJunchao Zhang } 18243ab56b82SJunchao Zhang } else { 1825a6053eceSJunchao Zhang irn = mumps->irn; 1826a6053eceSJunchao Zhang jcn = mumps->jcn; 1827a6053eceSJunchao Zhang val = mumps->val; 1828a6053eceSJunchao Zhang count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1829a6053eceSJunchao Zhang remain = mumps->nnz - count; 1830a6053eceSJunchao Zhang while (count > 0) { 18319566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 18329566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 18339566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1834a6053eceSJunchao Zhang irn += count; 1835a6053eceSJunchao Zhang jcn += count; 1836a6053eceSJunchao Zhang val += count; 1837a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1838a6053eceSJunchao Zhang remain -= count; 18393ab56b82SJunchao Zhang } 18403ab56b82SJunchao Zhang } 1841a6053eceSJunchao Zhang } else { 1842a6053eceSJunchao Zhang nreqs = 0; 1843a6053eceSJunchao Zhang if (mumps->is_omp_master) { 1844a6053eceSJunchao Zhang val = mumps->val + mumps->recvcount[0]; 1845a6053eceSJunchao Zhang for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1846a6053eceSJunchao Zhang count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1847a6053eceSJunchao Zhang remain = mumps->recvcount[i] - count; 1848a6053eceSJunchao Zhang while (count > 0) { 18499566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1850a6053eceSJunchao Zhang val += count; 1851a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1852a6053eceSJunchao Zhang remain -= count; 1853a6053eceSJunchao Zhang } 1854a6053eceSJunchao Zhang } 1855a6053eceSJunchao Zhang } else { 1856a6053eceSJunchao Zhang val = mumps->val; 1857a6053eceSJunchao Zhang count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1858a6053eceSJunchao Zhang remain = mumps->nnz - count; 1859a6053eceSJunchao Zhang while (count > 0) { 18609566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1861a6053eceSJunchao Zhang val += count; 1862a6053eceSJunchao Zhang count = PetscMin(remain, PETSC_MPI_INT_MAX); 1863a6053eceSJunchao Zhang remain -= count; 1864a6053eceSJunchao Zhang } 1865a6053eceSJunchao Zhang } 1866a6053eceSJunchao Zhang } 18679566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1868a6053eceSJunchao Zhang mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1869a6053eceSJunchao Zhang } 18703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18713ab56b82SJunchao Zhang } 18723ab56b82SJunchao Zhang 1873d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1874d71ae5a4SJacob Faibussowitsch { 1875e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1876ace3abfcSBarry Smith PetscBool isMPIAIJ; 1877397b6df1SKris Buschelman 1878397b6df1SKris Buschelman PetscFunctionBegin; 1879dbf6bb8dSprj- if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 188048a46eb9SPierre Jolivet if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 18819566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 18823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18832aca8efcSHong Zhang } 18846baea169SHong Zhang 18859566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 18869566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1887397b6df1SKris Buschelman 1888397b6df1SKris Buschelman /* numerical factorization phase */ 1889a5e57a09SHong Zhang mumps->id.job = JOB_FACTNUMERIC; 18904e34a73bSHong Zhang if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1891ad540459SPierre Jolivet if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1892397b6df1SKris Buschelman } else { 1893940cd9d6SSatish Balay mumps->id.a_loc = (MumpsScalar *)mumps->val; 1894397b6df1SKris Buschelman } 18953ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 1896a5e57a09SHong Zhang if (mumps->id.INFOG(1) < 0) { 18977a46b595SBarry Smith PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 1898c0d63f2fSHong Zhang if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 18999566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1900603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1901c0d63f2fSHong Zhang } else if (mumps->id.INFOG(1) == -13) { 19029566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1903603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1904c0d63f2fSHong Zhang } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 19059566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1906603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 19072aca8efcSHong Zhang } else { 19089566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1909603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OTHER; 1910151787a6SHong Zhang } 19112aca8efcSHong Zhang } 1912aed4548fSBarry Smith PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, " mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16)); 1913397b6df1SKris Buschelman 1914b3cb21ddSStefano Zampini F->assembled = PETSC_TRUE; 1915d47f36abSHong Zhang 1916b3cb21ddSStefano Zampini if (F->schur) { /* reset Schur status to unfactored */ 19173cb7dd0eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 1918c70f7ee4SJunchao Zhang F->schur->offloadmask = PETSC_OFFLOAD_CPU; 19193cb7dd0eSStefano Zampini #endif 1920b3cb21ddSStefano Zampini if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1921b3cb21ddSStefano Zampini mumps->id.ICNTL(19) = 2; 19229566063dSJacob Faibussowitsch PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1923b3cb21ddSStefano Zampini } 19249566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1925b3cb21ddSStefano Zampini } 192667877ebaSShri Abhyankar 1927066565c5SStefano Zampini /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1928066565c5SStefano Zampini if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1929066565c5SStefano Zampini 19303ab56b82SJunchao Zhang if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 19312d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 193267877ebaSShri Abhyankar PetscInt lsol_loc; 193367877ebaSShri Abhyankar PetscScalar *sol_loc; 19342205254eSKarl Rupp 19359566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1936c2093ab7SHong Zhang 1937c2093ab7SHong Zhang /* distributed solution; Create x_seq=sol_loc for repeated use */ 1938c2093ab7SHong Zhang if (mumps->x_seq) { 19399566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 19409566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 19419566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->x_seq)); 1942c2093ab7SHong Zhang } 1943a5e57a09SHong Zhang lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 19449566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1945a5e57a09SHong Zhang mumps->id.lsol_loc = lsol_loc; 1946940cd9d6SSatish Balay mumps->id.sol_loc = (MumpsScalar *)sol_loc; 19479566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 194867877ebaSShri Abhyankar } 19499566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 19503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1951397b6df1SKris Buschelman } 1952397b6df1SKris Buschelman 19539a2535b5SHong Zhang /* Sets MUMPS options from the options database */ 1954d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1955d71ae5a4SJacob Faibussowitsch { 1956e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1957413bcc21SPierre Jolivet PetscMUMPSInt icntl = 0, size, *listvar_schur; 195845e3843bSPierre Jolivet PetscInt info[80], i, ninfo = 80, rbs, cbs; 1959413bcc21SPierre Jolivet PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1960413bcc21SPierre Jolivet MumpsScalar *arr; 1961dcd589f8SShri Abhyankar 1962dcd589f8SShri Abhyankar PetscFunctionBegin; 196326cc229bSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 1964413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1965413bcc21SPierre Jolivet PetscInt nthreads = 0; 1966413bcc21SPierre Jolivet PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1967413bcc21SPierre Jolivet PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1968413bcc21SPierre Jolivet 1969413bcc21SPierre Jolivet mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1970413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 1971413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 1972413bcc21SPierre Jolivet 1973413bcc21SPierre Jolivet PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 1974413bcc21SPierre Jolivet if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1975413bcc21SPierre Jolivet /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1976413bcc21SPierre Jolivet PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 1977413bcc21SPierre Jolivet if (mumps->use_petsc_omp_support) { 19789371c9d4SSatish Balay PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 19799371c9d4SSatish Balay ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1980413bcc21SPierre Jolivet PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1981413bcc21SPierre Jolivet #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1982413bcc21SPierre Jolivet PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 1983413bcc21SPierre Jolivet PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 1984413bcc21SPierre Jolivet #endif 1985413bcc21SPierre Jolivet } else { 1986413bcc21SPierre Jolivet mumps->omp_comm = PETSC_COMM_SELF; 1987413bcc21SPierre Jolivet mumps->mumps_comm = mumps->petsc_comm; 1988413bcc21SPierre Jolivet mumps->is_omp_master = PETSC_TRUE; 1989413bcc21SPierre Jolivet } 1990413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 1991413bcc21SPierre Jolivet mumps->reqs = NULL; 1992413bcc21SPierre Jolivet mumps->tag = 0; 1993413bcc21SPierre Jolivet 1994413bcc21SPierre Jolivet if (mumps->mumps_comm != MPI_COMM_NULL) { 1995413bcc21SPierre Jolivet if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 1996413bcc21SPierre Jolivet /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1997413bcc21SPierre Jolivet MPI_Comm comm; 1998413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 1999413bcc21SPierre Jolivet mumps->mumps_comm = comm; 2000413bcc21SPierre Jolivet } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2001413bcc21SPierre Jolivet } 2002413bcc21SPierre Jolivet 2003413bcc21SPierre Jolivet mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2004413bcc21SPierre Jolivet mumps->id.job = JOB_INIT; 2005413bcc21SPierre Jolivet mumps->id.par = 1; /* host participates factorizaton and solve */ 2006413bcc21SPierre Jolivet mumps->id.sym = mumps->sym; 2007413bcc21SPierre Jolivet 2008413bcc21SPierre Jolivet size = mumps->id.size_schur; 2009413bcc21SPierre Jolivet arr = mumps->id.schur; 2010413bcc21SPierre Jolivet listvar_schur = mumps->id.listvar_schur; 2011413bcc21SPierre Jolivet PetscMUMPS_c(mumps); 2012413bcc21SPierre Jolivet PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 2013413bcc21SPierre Jolivet /* restore cached ICNTL and CNTL values */ 2014413bcc21SPierre Jolivet for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2015413bcc21SPierre Jolivet for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2016413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->ICNTL_pre)); 2017413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->CNTL_pre)); 2018413bcc21SPierre Jolivet 2019413bcc21SPierre Jolivet if (schur) { 2020413bcc21SPierre Jolivet mumps->id.size_schur = size; 2021413bcc21SPierre Jolivet mumps->id.schur_lld = size; 2022413bcc21SPierre Jolivet mumps->id.schur = arr; 2023413bcc21SPierre Jolivet mumps->id.listvar_schur = listvar_schur; 2024413bcc21SPierre Jolivet if (mumps->petsc_size > 1) { 2025413bcc21SPierre Jolivet PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2026413bcc21SPierre Jolivet 2027413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2028413bcc21SPierre Jolivet gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2029712fec58SPierre Jolivet PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2030413bcc21SPierre Jolivet PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2031413bcc21SPierre Jolivet } else { 2032413bcc21SPierre Jolivet if (F->factortype == MAT_FACTOR_LU) { 2033413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2034413bcc21SPierre Jolivet } else { 2035413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2036413bcc21SPierre Jolivet } 2037413bcc21SPierre Jolivet } 2038413bcc21SPierre Jolivet mumps->id.ICNTL(26) = -1; 2039413bcc21SPierre Jolivet } 2040413bcc21SPierre Jolivet 2041413bcc21SPierre Jolivet /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2042413bcc21SPierre Jolivet For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2043413bcc21SPierre Jolivet */ 2044413bcc21SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2045413bcc21SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2046413bcc21SPierre Jolivet 2047413bcc21SPierre Jolivet mumps->scat_rhs = NULL; 2048413bcc21SPierre Jolivet mumps->scat_sol = NULL; 2049413bcc21SPierre Jolivet 2050413bcc21SPierre Jolivet /* set PETSc-MUMPS default options - override MUMPS default */ 2051413bcc21SPierre Jolivet mumps->id.ICNTL(3) = 0; 2052413bcc21SPierre Jolivet mumps->id.ICNTL(4) = 0; 2053413bcc21SPierre Jolivet if (mumps->petsc_size == 1) { 2054413bcc21SPierre Jolivet mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2055413bcc21SPierre Jolivet mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2056413bcc21SPierre Jolivet } else { 2057413bcc21SPierre Jolivet mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2058413bcc21SPierre Jolivet mumps->id.ICNTL(21) = 1; /* distributed solution */ 2059413bcc21SPierre Jolivet } 2060413bcc21SPierre Jolivet } 20619566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 20629a2535b5SHong Zhang if (flg) mumps->id.ICNTL(1) = icntl; 20639566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 20649a2535b5SHong Zhang if (flg) mumps->id.ICNTL(2) = icntl; 20659566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 20669a2535b5SHong Zhang if (flg) mumps->id.ICNTL(3) = icntl; 2067dcd589f8SShri Abhyankar 20689566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 20699a2535b5SHong Zhang if (flg) mumps->id.ICNTL(4) = icntl; 20709a2535b5SHong Zhang if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 20719a2535b5SHong Zhang 20729566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 20739a2535b5SHong Zhang if (flg) mumps->id.ICNTL(6) = icntl; 20749a2535b5SHong Zhang 20759566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2076dcd589f8SShri Abhyankar if (flg) { 2077aed4548fSBarry Smith PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2078b53c1a7fSBarry Smith mumps->id.ICNTL(7) = icntl; 2079dcd589f8SShri Abhyankar } 2080e0b74bf9SHong Zhang 20819566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 20829566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 20839566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 20849566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 20859566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 20869566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 20879566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 208845e3843bSPierre Jolivet PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 208945e3843bSPierre Jolivet if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 209045e3843bSPierre Jolivet PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 209145e3843bSPierre Jolivet if (flg) { 209245e3843bSPierre Jolivet PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 209345e3843bSPierre Jolivet PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 209445e3843bSPierre Jolivet } 20959566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 209659ac8732SStefano Zampini if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 20979566063dSJacob Faibussowitsch PetscCall(MatDestroy(&F->schur)); 20989566063dSJacob Faibussowitsch PetscCall(MatMumpsResetSchur_Private(mumps)); 209959ac8732SStefano Zampini } 210025aac85cSJunchao Zhang 210143f3b051SJunchao Zhang /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 210243f3b051SJunchao Zhang and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 210325aac85cSJunchao Zhang and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 210443f3b051SJunchao Zhang This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 210543f3b051SJunchao Zhang see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 210643f3b051SJunchao Zhang In short, we could not use distributed RHS with MPICH until v4.0b1. 210725aac85cSJunchao Zhang */ 210843f3b051SJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 210925aac85cSJunchao Zhang mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 211043f3b051SJunchao Zhang #else 211143f3b051SJunchao Zhang mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 211225aac85cSJunchao Zhang #endif 21139566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2114aed4548fSBarry Smith PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 211525aac85cSJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2116aed4548fSBarry Smith PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 211725aac85cSJunchao Zhang #endif 21189566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 21199a2535b5SHong Zhang 21209566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 21219566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 21229566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 21239371c9d4SSatish Balay if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2124d7ebd59bSHong Zhang 21259566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 21269566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 21279566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 21289566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 21299566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 21309566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 21319566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 21329566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 21339566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 21349566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 21359566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 21369566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2137dcd589f8SShri Abhyankar 21389566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 21399566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 21409566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 21419566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 21429566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 21439566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2144e5bb22a1SHong Zhang 21459566063dSJacob Faibussowitsch PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2146b34f08ffSHong Zhang 21479566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2148b34f08ffSHong Zhang if (ninfo) { 214908401ef6SPierre Jolivet PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 21509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2151b34f08ffSHong Zhang mumps->ninfo = ninfo; 2152b34f08ffSHong Zhang for (i = 0; i < ninfo; i++) { 2153aed4548fSBarry Smith PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2154f7d195e4SLawrence Mitchell mumps->info[i] = info[i]; 2155b34f08ffSHong Zhang } 2156b34f08ffSHong Zhang } 2157d0609cedSBarry Smith PetscOptionsEnd(); 21583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2159dcd589f8SShri Abhyankar } 2160dcd589f8SShri Abhyankar 2161d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2162d71ae5a4SJacob Faibussowitsch { 21635cd7cf9dSHong Zhang PetscFunctionBegin; 21645cd7cf9dSHong Zhang if (mumps->id.INFOG(1) < 0) { 21657a46b595SBarry Smith PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1)); 21665cd7cf9dSHong Zhang if (mumps->id.INFOG(1) == -6) { 21679566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2168603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 21695cd7cf9dSHong Zhang } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 21709566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2171603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2172dbf6bb8dSprj- } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 21739566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "Empty matrix\n")); 21745cd7cf9dSHong Zhang } else { 21759566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2176603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OTHER; 21775cd7cf9dSHong Zhang } 21785cd7cf9dSHong Zhang } 21793ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 21805cd7cf9dSHong Zhang } 21815cd7cf9dSHong Zhang 2182d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2183d71ae5a4SJacob Faibussowitsch { 2184e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 218567877ebaSShri Abhyankar Vec b; 218667877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 2187397b6df1SKris Buschelman 2188397b6df1SKris Buschelman PetscFunctionBegin; 2189d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2190d47f36abSHong Zhang /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 21913ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2192d47f36abSHong Zhang } 2193dcd589f8SShri Abhyankar 21949a2535b5SHong Zhang /* Set MUMPS options from the options database */ 219526cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 2196dcd589f8SShri Abhyankar 21979566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 21989566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2199dcd589f8SShri Abhyankar 220067877ebaSShri Abhyankar /* analysis phase */ 2201a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 2202a5e57a09SHong Zhang mumps->id.n = M; 2203a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 220467877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 2205a5e57a09SHong Zhang if (!mumps->myid) { 2206a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 2207a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2208a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2209a6053eceSJunchao Zhang if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 22104ac6704cSBarry Smith if (r) { 22114ac6704cSBarry Smith mumps->id.ICNTL(7) = 1; 2212a5e57a09SHong Zhang if (!mumps->myid) { 2213e0b74bf9SHong Zhang const PetscInt *idx; 2214a6053eceSJunchao Zhang PetscInt i; 22152205254eSKarl Rupp 22169566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 22179566063dSJacob Faibussowitsch PetscCall(ISGetIndices(r, &idx)); 22189566063dSJacob Faibussowitsch for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 22199566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(r, &idx)); 2220e0b74bf9SHong Zhang } 2221e0b74bf9SHong Zhang } 222267877ebaSShri Abhyankar } 222367877ebaSShri Abhyankar break; 222467877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2225a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2226a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2227a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2228a6053eceSJunchao Zhang if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 222925aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 22309566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 22319566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 22329566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 223325aac85cSJunchao Zhang } 223467877ebaSShri Abhyankar break; 223567877ebaSShri Abhyankar } 22363ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 22379566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 223867877ebaSShri Abhyankar 2239719d5645SBarry Smith F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2240dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 224151d5961aSHong Zhang F->ops->solvetranspose = MatSolveTranspose_MUMPS; 22424e34a73bSHong Zhang F->ops->matsolve = MatMatSolve_MUMPS; 2243eb3ef3b2SHong Zhang F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2244b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2245d47f36abSHong Zhang 2246d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 22473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2248b24902e0SBarry Smith } 2249b24902e0SBarry Smith 2250450b117fSShri Abhyankar /* Note the Petsc r and c permutations are ignored */ 2251d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2252d71ae5a4SJacob Faibussowitsch { 2253e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 225467877ebaSShri Abhyankar Vec b; 225567877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 2256450b117fSShri Abhyankar 2257450b117fSShri Abhyankar PetscFunctionBegin; 2258d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2259d47f36abSHong Zhang /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 22603ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2261d47f36abSHong Zhang } 2262dcd589f8SShri Abhyankar 22639a2535b5SHong Zhang /* Set MUMPS options from the options database */ 226426cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 2265dcd589f8SShri Abhyankar 22669566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 22679566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 226867877ebaSShri Abhyankar 226967877ebaSShri Abhyankar /* analysis phase */ 2270a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 2271a5e57a09SHong Zhang mumps->id.n = M; 2272a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 227367877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 2274a5e57a09SHong Zhang if (!mumps->myid) { 2275a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 2276a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2277a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2278ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 227967877ebaSShri Abhyankar } 228067877ebaSShri Abhyankar break; 228167877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2282a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2283a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2284a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2285ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 228625aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 22879566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 22889566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 22899566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 229025aac85cSJunchao Zhang } 229167877ebaSShri Abhyankar break; 229267877ebaSShri Abhyankar } 22933ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 22949566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 229567877ebaSShri Abhyankar 2296450b117fSShri Abhyankar F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2297dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 229851d5961aSHong Zhang F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2299b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2300d47f36abSHong Zhang 2301d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 23023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2303450b117fSShri Abhyankar } 2304b24902e0SBarry Smith 2305141f4205SHong Zhang /* Note the Petsc r permutation and factor info are ignored */ 2306d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2307d71ae5a4SJacob Faibussowitsch { 2308e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 230967877ebaSShri Abhyankar Vec b; 231067877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 2311397b6df1SKris Buschelman 2312397b6df1SKris Buschelman PetscFunctionBegin; 2313d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2314d47f36abSHong Zhang /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 23153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2316d47f36abSHong Zhang } 2317dcd589f8SShri Abhyankar 23189a2535b5SHong Zhang /* Set MUMPS options from the options database */ 231926cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 2320dcd589f8SShri Abhyankar 23219566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 23229566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2323dcd589f8SShri Abhyankar 232467877ebaSShri Abhyankar /* analysis phase */ 2325a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 2326a5e57a09SHong Zhang mumps->id.n = M; 2327a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 232867877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 2329a5e57a09SHong Zhang if (!mumps->myid) { 2330a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 2331a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2332a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2333ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 233467877ebaSShri Abhyankar } 233567877ebaSShri Abhyankar break; 233667877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2337a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2338a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2339a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2340ad540459SPierre Jolivet if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 234125aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 23429566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 23439566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 23449566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 234525aac85cSJunchao Zhang } 234667877ebaSShri Abhyankar break; 234767877ebaSShri Abhyankar } 23483ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 23499566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 23505cd7cf9dSHong Zhang 23512792810eSHong Zhang F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2352dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 235351d5961aSHong Zhang F->ops->solvetranspose = MatSolve_MUMPS; 23544e34a73bSHong Zhang F->ops->matsolve = MatMatSolve_MUMPS; 235523a5080aSHong Zhang F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2356b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 23574e34a73bSHong Zhang #if defined(PETSC_USE_COMPLEX) 23580298fd71SBarry Smith F->ops->getinertia = NULL; 23594e34a73bSHong Zhang #else 23604e34a73bSHong Zhang F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2361db4efbfdSBarry Smith #endif 2362d47f36abSHong Zhang 2363d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 23643ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2365b24902e0SBarry Smith } 2366b24902e0SBarry Smith 2367d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2368d71ae5a4SJacob Faibussowitsch { 236964e6c443SBarry Smith PetscBool iascii; 237064e6c443SBarry Smith PetscViewerFormat format; 2371e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2372f6c57405SHong Zhang 2373f6c57405SHong Zhang PetscFunctionBegin; 237464e6c443SBarry Smith /* check if matrix is mumps type */ 23753ba16761SJacob Faibussowitsch if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 237664e6c443SBarry Smith 23779566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 237864e6c443SBarry Smith if (iascii) { 23799566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 23801511cd71SPierre Jolivet if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 23819566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 23821511cd71SPierre Jolivet if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 23839566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 23849566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 23859566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 23869566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 23879566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 23889566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 23899566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 23909566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 23919566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 23929566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 23939566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 23949566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2395a5e57a09SHong Zhang if (mumps->id.ICNTL(11) > 0) { 23969566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 23979566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 23989566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 23999566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 24009566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 24019566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2402f6c57405SHong Zhang } 24039566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 24049566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 24059566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 240645e3843bSPierre Jolivet PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2407f6c57405SHong Zhang /* ICNTL(15-17) not used */ 24089566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 24099566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 24109566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 24119566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 24129566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 24139566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2414c0165424SHong Zhang 24159566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 24169566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 24179566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 24189566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 24199566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 24209566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 242142179a6aSHong Zhang 24229566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 24239566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 24249566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 24259566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 24269566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 24279566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2428f6c57405SHong Zhang 24299566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 24309566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 24319566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 24329566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 24339566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 24349566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2435f6c57405SHong Zhang 2436a5b23f4aSJose E. Roman /* information local to each processor */ 24379566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 24389566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 24399566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 24409566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 24419566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 24429566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 24439566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 24449566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 24459566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 24469566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2447f6c57405SHong Zhang 24489566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 24499566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 24509566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2451f6c57405SHong Zhang 24529566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 24539566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 24549566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2455f6c57405SHong Zhang 24569566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 24579566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 24589566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2459b34f08ffSHong Zhang 2460a0e18203SThibaut Appel if (mumps->ninfo && mumps->ninfo <= 80) { 2461b34f08ffSHong Zhang PetscInt i; 2462b34f08ffSHong Zhang for (i = 0; i < mumps->ninfo; i++) { 24639566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 24649566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 24659566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 2466b34f08ffSHong Zhang } 2467b34f08ffSHong Zhang } 24689566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 24691511cd71SPierre Jolivet } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2470f6c57405SHong Zhang 24711511cd71SPierre Jolivet if (mumps->myid == 0) { /* information from the host */ 24729566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 24739566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 24749566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 24759566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2476f6c57405SHong Zhang 24779566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 24789566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 24799566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 24809566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 24819566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 24829566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 24839566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 24849566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 24859566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 24869566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 24879566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 24889566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 24899566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 24909566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 24919566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 24929566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 24939566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 24949566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 24959566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 24969566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 24979566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 24989566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 24999566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 25009566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 25019566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 25029566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 25039566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 25049566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 25059566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 25069566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 25079566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 25089566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 25099566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 25109566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2511f6c57405SHong Zhang } 2512f6c57405SHong Zhang } 2513cb828f0fSHong Zhang } 25143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2515f6c57405SHong Zhang } 2516f6c57405SHong Zhang 2517d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2518d71ae5a4SJacob Faibussowitsch { 2519e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 252035bd34faSBarry Smith 252135bd34faSBarry Smith PetscFunctionBegin; 252235bd34faSBarry Smith info->block_size = 1.0; 2523cb828f0fSHong Zhang info->nz_allocated = mumps->id.INFOG(20); 2524cb828f0fSHong Zhang info->nz_used = mumps->id.INFOG(20); 252535bd34faSBarry Smith info->nz_unneeded = 0.0; 252635bd34faSBarry Smith info->assemblies = 0.0; 252735bd34faSBarry Smith info->mallocs = 0.0; 252835bd34faSBarry Smith info->memory = 0.0; 252935bd34faSBarry Smith info->fill_ratio_given = 0; 253035bd34faSBarry Smith info->fill_ratio_needed = 0; 253135bd34faSBarry Smith info->factor_mallocs = 0; 25323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 253335bd34faSBarry Smith } 253435bd34faSBarry Smith 2535d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2536d71ae5a4SJacob Faibussowitsch { 2537e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2538a3d589ffSStefano Zampini const PetscScalar *arr; 25398e7ba810SStefano Zampini const PetscInt *idxs; 25408e7ba810SStefano Zampini PetscInt size, i; 25416444a565SStefano Zampini 25426444a565SStefano Zampini PetscFunctionBegin; 25439566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is, &size)); 2544b3cb21ddSStefano Zampini /* Schur complement matrix */ 25459566063dSJacob Faibussowitsch PetscCall(MatDestroy(&F->schur)); 25469566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 25479566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2548a3d589ffSStefano Zampini mumps->id.schur = (MumpsScalar *)arr; 2549a3d589ffSStefano Zampini mumps->id.size_schur = size; 2550a3d589ffSStefano Zampini mumps->id.schur_lld = size; 25519566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 255248a46eb9SPierre Jolivet if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2553b3cb21ddSStefano Zampini 2554b3cb21ddSStefano Zampini /* MUMPS expects Fortran style indices */ 25559566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.listvar_schur)); 25569566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 25579566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is, &idxs)); 25589566063dSJacob Faibussowitsch for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 25599566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is, &idxs)); 256059ac8732SStefano Zampini /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2561b5fa320bSStefano Zampini mumps->id.ICNTL(26) = -1; 25623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25636444a565SStefano Zampini } 256459ac8732SStefano Zampini 2565d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2566d71ae5a4SJacob Faibussowitsch { 25676444a565SStefano Zampini Mat St; 2568e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 25696444a565SStefano Zampini PetscScalar *array; 25706444a565SStefano Zampini #if defined(PETSC_USE_COMPLEX) 25718ac429a0SStefano Zampini PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 25726444a565SStefano Zampini #endif 25736444a565SStefano Zampini 25746444a565SStefano Zampini PetscFunctionBegin; 257508401ef6SPierre Jolivet PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 25769566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 25779566063dSJacob Faibussowitsch PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 25789566063dSJacob Faibussowitsch PetscCall(MatSetType(St, MATDENSE)); 25799566063dSJacob Faibussowitsch PetscCall(MatSetUp(St)); 25809566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(St, &array)); 258159ac8732SStefano Zampini if (!mumps->sym) { /* MUMPS always return a full matrix */ 25826444a565SStefano Zampini if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 25836444a565SStefano Zampini PetscInt i, j, N = mumps->id.size_schur; 25846444a565SStefano Zampini for (i = 0; i < N; i++) { 25856444a565SStefano Zampini for (j = 0; j < N; j++) { 25866444a565SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 25876444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j]; 25886444a565SStefano Zampini #else 25896444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 25906444a565SStefano Zampini #endif 25916444a565SStefano Zampini array[j * N + i] = val; 25926444a565SStefano Zampini } 25936444a565SStefano Zampini } 25946444a565SStefano Zampini } else { /* stored by columns */ 25959566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 25966444a565SStefano Zampini } 25976444a565SStefano Zampini } else { /* either full or lower-triangular (not packed) */ 25986444a565SStefano Zampini if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 25996444a565SStefano Zampini PetscInt i, j, N = mumps->id.size_schur; 26006444a565SStefano Zampini for (i = 0; i < N; i++) { 26016444a565SStefano Zampini for (j = i; j < N; j++) { 26026444a565SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 26036444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j]; 26046444a565SStefano Zampini #else 26056444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 26066444a565SStefano Zampini #endif 26076444a565SStefano Zampini array[i * N + j] = val; 26086444a565SStefano Zampini array[j * N + i] = val; 26096444a565SStefano Zampini } 26106444a565SStefano Zampini } 26116444a565SStefano Zampini } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 26129566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 26136444a565SStefano Zampini } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 26146444a565SStefano Zampini PetscInt i, j, N = mumps->id.size_schur; 26156444a565SStefano Zampini for (i = 0; i < N; i++) { 26166444a565SStefano Zampini for (j = 0; j < i + 1; j++) { 26176444a565SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 26186444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j]; 26196444a565SStefano Zampini #else 26206444a565SStefano Zampini PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 26216444a565SStefano Zampini #endif 26226444a565SStefano Zampini array[i * N + j] = val; 26236444a565SStefano Zampini array[j * N + i] = val; 26246444a565SStefano Zampini } 26256444a565SStefano Zampini } 26266444a565SStefano Zampini } 26276444a565SStefano Zampini } 26289566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(St, &array)); 26296444a565SStefano Zampini *S = St; 26303ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 26316444a565SStefano Zampini } 26326444a565SStefano Zampini 2633d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2634d71ae5a4SJacob Faibussowitsch { 2635e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 26365ccb76cbSHong Zhang 26375ccb76cbSHong Zhang PetscFunctionBegin; 2638413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2639413bcc21SPierre Jolivet PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 26409371c9d4SSatish Balay for (i = 0; i < nICNTL_pre; ++i) 26419371c9d4SSatish Balay if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2642413bcc21SPierre Jolivet if (i == nICNTL_pre) { /* not already cached */ 2643413bcc21SPierre Jolivet if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2644413bcc21SPierre Jolivet else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2645413bcc21SPierre Jolivet mumps->ICNTL_pre[0]++; 2646413bcc21SPierre Jolivet } 2647413bcc21SPierre Jolivet mumps->ICNTL_pre[1 + 2 * i] = icntl; 2648413bcc21SPierre Jolivet PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2649413bcc21SPierre Jolivet } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 26503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 26515ccb76cbSHong Zhang } 26525ccb76cbSHong Zhang 2653d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2654d71ae5a4SJacob Faibussowitsch { 2655e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2656bc6112feSHong Zhang 2657bc6112feSHong Zhang PetscFunctionBegin; 265836df9881Sjeremy theler if (mumps->id.job == JOB_NULL) { 265936df9881Sjeremy theler PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 266036df9881Sjeremy theler *ival = 0; 266136df9881Sjeremy theler for (i = 0; i < nICNTL_pre; ++i) { 266236df9881Sjeremy theler if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 266336df9881Sjeremy theler } 266436df9881Sjeremy theler } else *ival = mumps->id.ICNTL(icntl); 26653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2666bc6112feSHong Zhang } 2667bc6112feSHong Zhang 26685ccb76cbSHong Zhang /*@ 26695ccb76cbSHong Zhang MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 26705ccb76cbSHong Zhang 2671c3339decSBarry Smith Logically Collective 26725ccb76cbSHong Zhang 26735ccb76cbSHong Zhang Input Parameters: 267411a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 26755ccb76cbSHong Zhang . icntl - index of MUMPS parameter array ICNTL() 26765ccb76cbSHong Zhang - ival - value of MUMPS ICNTL(icntl) 26775ccb76cbSHong Zhang 26783c7db156SBarry Smith Options Database Key: 2679147403d9SBarry Smith . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 26805ccb76cbSHong Zhang 26815ccb76cbSHong Zhang Level: beginner 26825ccb76cbSHong Zhang 268396a0c994SBarry Smith References: 2684606c0280SSatish Balay . * - MUMPS Users' Guide 26855ccb76cbSHong Zhang 26862ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 26875ccb76cbSHong Zhang @*/ 2688d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2689d71ae5a4SJacob Faibussowitsch { 26905ccb76cbSHong Zhang PetscFunctionBegin; 26912989dfd4SHong Zhang PetscValidType(F, 1); 269228b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 26935ccb76cbSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 26945ccb76cbSHong Zhang PetscValidLogicalCollectiveInt(F, ival, 3); 2695413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2696cac4c232SBarry Smith PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 26973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 26985ccb76cbSHong Zhang } 26995ccb76cbSHong Zhang 2700a21f80fcSHong Zhang /*@ 2701a21f80fcSHong Zhang MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2702a21f80fcSHong Zhang 2703c3339decSBarry Smith Logically Collective 2704a21f80fcSHong Zhang 2705a21f80fcSHong Zhang Input Parameters: 270611a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2707a21f80fcSHong Zhang - icntl - index of MUMPS parameter array ICNTL() 2708a21f80fcSHong Zhang 2709a21f80fcSHong Zhang Output Parameter: 2710a21f80fcSHong Zhang . ival - value of MUMPS ICNTL(icntl) 2711a21f80fcSHong Zhang 2712a21f80fcSHong Zhang Level: beginner 2713a21f80fcSHong Zhang 271496a0c994SBarry Smith References: 2715606c0280SSatish Balay . * - MUMPS Users' Guide 2716a21f80fcSHong Zhang 27172ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2718a21f80fcSHong Zhang @*/ 2719d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2720d71ae5a4SJacob Faibussowitsch { 2721bc6112feSHong Zhang PetscFunctionBegin; 27222989dfd4SHong Zhang PetscValidType(F, 1); 272328b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2724bc6112feSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 2725bc6112feSHong Zhang PetscValidIntPointer(ival, 3); 2726413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2727cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 27283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2729bc6112feSHong Zhang } 2730bc6112feSHong Zhang 2731d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2732d71ae5a4SJacob Faibussowitsch { 2733e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 27348928b65cSHong Zhang 27358928b65cSHong Zhang PetscFunctionBegin; 2736413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { 2737413bcc21SPierre Jolivet PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 27389371c9d4SSatish Balay for (i = 0; i < nCNTL_pre; ++i) 27399371c9d4SSatish Balay if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2740413bcc21SPierre Jolivet if (i == nCNTL_pre) { 2741413bcc21SPierre Jolivet if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2742413bcc21SPierre Jolivet else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2743413bcc21SPierre Jolivet mumps->CNTL_pre[0]++; 2744413bcc21SPierre Jolivet } 2745413bcc21SPierre Jolivet mumps->CNTL_pre[1 + 2 * i] = icntl; 2746413bcc21SPierre Jolivet mumps->CNTL_pre[2 + 2 * i] = val; 2747413bcc21SPierre Jolivet } else mumps->id.CNTL(icntl) = val; 27483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 27498928b65cSHong Zhang } 27508928b65cSHong Zhang 2751d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2752d71ae5a4SJacob Faibussowitsch { 2753e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2754bc6112feSHong Zhang 2755bc6112feSHong Zhang PetscFunctionBegin; 275636df9881Sjeremy theler if (mumps->id.job == JOB_NULL) { 275736df9881Sjeremy theler PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 275836df9881Sjeremy theler *val = 0.0; 275936df9881Sjeremy theler for (i = 0; i < nCNTL_pre; ++i) { 276036df9881Sjeremy theler if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 276136df9881Sjeremy theler } 276236df9881Sjeremy theler } else *val = mumps->id.CNTL(icntl); 27633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2764bc6112feSHong Zhang } 2765bc6112feSHong Zhang 27668928b65cSHong Zhang /*@ 27678928b65cSHong Zhang MatMumpsSetCntl - Set MUMPS parameter CNTL() 27688928b65cSHong Zhang 2769c3339decSBarry Smith Logically Collective 27708928b65cSHong Zhang 27718928b65cSHong Zhang Input Parameters: 277211a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 27738928b65cSHong Zhang . icntl - index of MUMPS parameter array CNTL() 27748928b65cSHong Zhang - val - value of MUMPS CNTL(icntl) 27758928b65cSHong Zhang 27763c7db156SBarry Smith Options Database Key: 2777147403d9SBarry Smith . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 27788928b65cSHong Zhang 27798928b65cSHong Zhang Level: beginner 27808928b65cSHong Zhang 278196a0c994SBarry Smith References: 2782606c0280SSatish Balay . * - MUMPS Users' Guide 27838928b65cSHong Zhang 27842ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 27858928b65cSHong Zhang @*/ 2786d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2787d71ae5a4SJacob Faibussowitsch { 27888928b65cSHong Zhang PetscFunctionBegin; 27892989dfd4SHong Zhang PetscValidType(F, 1); 279028b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 27918928b65cSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 2792bc6112feSHong Zhang PetscValidLogicalCollectiveReal(F, val, 3); 2793413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2794cac4c232SBarry Smith PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 27953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 27968928b65cSHong Zhang } 27978928b65cSHong Zhang 2798a21f80fcSHong Zhang /*@ 2799a21f80fcSHong Zhang MatMumpsGetCntl - Get MUMPS parameter CNTL() 2800a21f80fcSHong Zhang 2801c3339decSBarry Smith Logically Collective 2802a21f80fcSHong Zhang 2803a21f80fcSHong Zhang Input Parameters: 280411a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2805a21f80fcSHong Zhang - icntl - index of MUMPS parameter array CNTL() 2806a21f80fcSHong Zhang 2807a21f80fcSHong Zhang Output Parameter: 2808a21f80fcSHong Zhang . val - value of MUMPS CNTL(icntl) 2809a21f80fcSHong Zhang 2810a21f80fcSHong Zhang Level: beginner 2811a21f80fcSHong Zhang 281296a0c994SBarry Smith References: 2813606c0280SSatish Balay . * - MUMPS Users' Guide 2814a21f80fcSHong Zhang 28152ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2816a21f80fcSHong Zhang @*/ 2817d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2818d71ae5a4SJacob Faibussowitsch { 2819bc6112feSHong Zhang PetscFunctionBegin; 28202989dfd4SHong Zhang PetscValidType(F, 1); 282128b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2822bc6112feSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 2823bc6112feSHong Zhang PetscValidRealPointer(val, 3); 2824413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2825cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 28263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2827bc6112feSHong Zhang } 2828bc6112feSHong Zhang 2829d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2830d71ae5a4SJacob Faibussowitsch { 2831e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2832bc6112feSHong Zhang 2833bc6112feSHong Zhang PetscFunctionBegin; 2834bc6112feSHong Zhang *info = mumps->id.INFO(icntl); 28353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2836bc6112feSHong Zhang } 2837bc6112feSHong Zhang 2838d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2839d71ae5a4SJacob Faibussowitsch { 2840e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2841bc6112feSHong Zhang 2842bc6112feSHong Zhang PetscFunctionBegin; 2843bc6112feSHong Zhang *infog = mumps->id.INFOG(icntl); 28443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2845bc6112feSHong Zhang } 2846bc6112feSHong Zhang 2847d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2848d71ae5a4SJacob Faibussowitsch { 2849e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2850bc6112feSHong Zhang 2851bc6112feSHong Zhang PetscFunctionBegin; 2852bc6112feSHong Zhang *rinfo = mumps->id.RINFO(icntl); 28533ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2854bc6112feSHong Zhang } 2855bc6112feSHong Zhang 2856d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2857d71ae5a4SJacob Faibussowitsch { 2858e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2859bc6112feSHong Zhang 2860bc6112feSHong Zhang PetscFunctionBegin; 2861bc6112feSHong Zhang *rinfog = mumps->id.RINFOG(icntl); 28623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2863bc6112feSHong Zhang } 2864bc6112feSHong Zhang 28655c0bae8cSAshish Patel PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 28665c0bae8cSAshish Patel { 28675c0bae8cSAshish Patel Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 28685c0bae8cSAshish Patel 28695c0bae8cSAshish Patel PetscFunctionBegin; 28705c0bae8cSAshish Patel PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 28715c0bae8cSAshish Patel *size = 0; 28725c0bae8cSAshish Patel *array = NULL; 28735c0bae8cSAshish Patel if (!mumps->myid) { 28745c0bae8cSAshish Patel *size = mumps->id.INFOG(28); 28755c0bae8cSAshish Patel PetscCall(PetscMalloc1(*size, array)); 28765c0bae8cSAshish Patel for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 28775c0bae8cSAshish Patel } 28785c0bae8cSAshish Patel PetscFunctionReturn(PETSC_SUCCESS); 28795c0bae8cSAshish Patel } 28805c0bae8cSAshish Patel 2881d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2882d71ae5a4SJacob Faibussowitsch { 28830e6b8875SHong Zhang Mat Bt = NULL, Btseq = NULL; 28840e6b8875SHong Zhang PetscBool flg; 2885bb599dfdSHong Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2886bb599dfdSHong Zhang PetscScalar *aa; 2887f410b75aSHong Zhang PetscInt spnr, *ia, *ja, M, nrhs; 2888bb599dfdSHong Zhang 2889bb599dfdSHong Zhang PetscFunctionBegin; 2890064a246eSJacob Faibussowitsch PetscValidPointer(spRHS, 2); 2891013e2dc7SBarry Smith PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 28920e6b8875SHong Zhang if (flg) { 28939566063dSJacob Faibussowitsch PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2894013e2dc7SBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2895bb599dfdSHong Zhang 28969566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2897bb599dfdSHong Zhang 28982d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 28990e6b8875SHong Zhang Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 29000e6b8875SHong Zhang Btseq = b->A; 29010e6b8875SHong Zhang } else { 29020e6b8875SHong Zhang Btseq = Bt; 29030e6b8875SHong Zhang } 29040e6b8875SHong Zhang 29059566063dSJacob Faibussowitsch PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2906f410b75aSHong Zhang mumps->id.nrhs = nrhs; 2907f410b75aSHong Zhang mumps->id.lrhs = M; 2908f410b75aSHong Zhang mumps->id.rhs = NULL; 2909f410b75aSHong Zhang 2910e3f2db6aSHong Zhang if (!mumps->myid) { 29119566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 29129566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 291328b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 29149566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2915bb599dfdSHong Zhang mumps->id.rhs_sparse = (MumpsScalar *)aa; 2916e3f2db6aSHong Zhang } else { 2917e3f2db6aSHong Zhang mumps->id.irhs_ptr = NULL; 2918e3f2db6aSHong Zhang mumps->id.irhs_sparse = NULL; 2919e3f2db6aSHong Zhang mumps->id.nz_rhs = 0; 2920e3f2db6aSHong Zhang mumps->id.rhs_sparse = NULL; 2921e3f2db6aSHong Zhang } 2922bb599dfdSHong Zhang mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2923e3f2db6aSHong Zhang mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2924bb599dfdSHong Zhang 2925bb599dfdSHong Zhang /* solve phase */ 2926bb599dfdSHong Zhang mumps->id.job = JOB_SOLVE; 29273ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 2928049d1499SBarry Smith PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 292914267174SHong Zhang 2930e3f2db6aSHong Zhang if (!mumps->myid) { 29319566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 29329566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 293328b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2934e3f2db6aSHong Zhang } 29353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2936bb599dfdSHong Zhang } 2937bb599dfdSHong Zhang 2938bb599dfdSHong Zhang /*@ 29392ef1f0ffSBarry Smith MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 2940bb599dfdSHong Zhang 2941c3339decSBarry Smith Logically Collective 2942bb599dfdSHong Zhang 294320f4b53cSBarry Smith Input Parameter: 294420f4b53cSBarry Smith . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2945bb599dfdSHong Zhang 2946bb599dfdSHong Zhang Output Parameter: 294720f4b53cSBarry Smith . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 2948bb599dfdSHong Zhang 2949bb599dfdSHong Zhang Level: beginner 2950bb599dfdSHong Zhang 2951bb599dfdSHong Zhang References: 2952606c0280SSatish Balay . * - MUMPS Users' Guide 2953bb599dfdSHong Zhang 29542ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 2955bb599dfdSHong Zhang @*/ 2956d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2957d71ae5a4SJacob Faibussowitsch { 2958bb599dfdSHong Zhang PetscFunctionBegin; 2959bb599dfdSHong Zhang PetscValidType(F, 1); 296028b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2961cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 29623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2963bb599dfdSHong Zhang } 2964bb599dfdSHong Zhang 2965d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 2966d71ae5a4SJacob Faibussowitsch { 29670e6b8875SHong Zhang Mat spRHS; 29680e6b8875SHong Zhang 29690e6b8875SHong Zhang PetscFunctionBegin; 29709566063dSJacob Faibussowitsch PetscCall(MatCreateTranspose(spRHST, &spRHS)); 29719566063dSJacob Faibussowitsch PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 29729566063dSJacob Faibussowitsch PetscCall(MatDestroy(&spRHS)); 29733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 29740e6b8875SHong Zhang } 29750e6b8875SHong Zhang 29760e6b8875SHong Zhang /*@ 29772ef1f0ffSBarry Smith MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 29780e6b8875SHong Zhang 2979c3339decSBarry Smith Logically Collective 29800e6b8875SHong Zhang 298120f4b53cSBarry Smith Input Parameter: 298220f4b53cSBarry Smith . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 29830e6b8875SHong Zhang 29840e6b8875SHong Zhang Output Parameter: 298520f4b53cSBarry Smith . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 29860e6b8875SHong Zhang 29870e6b8875SHong Zhang Level: beginner 29880e6b8875SHong Zhang 29890e6b8875SHong Zhang References: 2990606c0280SSatish Balay . * - MUMPS Users' Guide 29910e6b8875SHong Zhang 29922ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 29930e6b8875SHong Zhang @*/ 2994d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 2995d71ae5a4SJacob Faibussowitsch { 29960e6b8875SHong Zhang PetscBool flg; 29970e6b8875SHong Zhang 29980e6b8875SHong Zhang PetscFunctionBegin; 29990e6b8875SHong Zhang PetscValidType(F, 1); 300028b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 30019566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 300228b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 30030e6b8875SHong Zhang 3004cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 30053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 30060e6b8875SHong Zhang } 30070e6b8875SHong Zhang 3008a21f80fcSHong Zhang /*@ 3009a21f80fcSHong Zhang MatMumpsGetInfo - Get MUMPS parameter INFO() 3010a21f80fcSHong Zhang 3011c3339decSBarry Smith Logically Collective 3012a21f80fcSHong Zhang 3013a21f80fcSHong Zhang Input Parameters: 301411a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3015a21f80fcSHong Zhang - icntl - index of MUMPS parameter array INFO() 3016a21f80fcSHong Zhang 3017a21f80fcSHong Zhang Output Parameter: 3018a21f80fcSHong Zhang . ival - value of MUMPS INFO(icntl) 3019a21f80fcSHong Zhang 3020a21f80fcSHong Zhang Level: beginner 3021a21f80fcSHong Zhang 302296a0c994SBarry Smith References: 3023606c0280SSatish Balay . * - MUMPS Users' Guide 3024a21f80fcSHong Zhang 30252ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3026a21f80fcSHong Zhang @*/ 3027d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3028d71ae5a4SJacob Faibussowitsch { 3029bc6112feSHong Zhang PetscFunctionBegin; 30302989dfd4SHong Zhang PetscValidType(F, 1); 303128b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3032ca810319SHong Zhang PetscValidIntPointer(ival, 3); 3033cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 30343ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3035bc6112feSHong Zhang } 3036bc6112feSHong Zhang 3037a21f80fcSHong Zhang /*@ 3038a21f80fcSHong Zhang MatMumpsGetInfog - Get MUMPS parameter INFOG() 3039a21f80fcSHong Zhang 3040c3339decSBarry Smith Logically Collective 3041a21f80fcSHong Zhang 3042a21f80fcSHong Zhang Input Parameters: 304311a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3044a21f80fcSHong Zhang - icntl - index of MUMPS parameter array INFOG() 3045a21f80fcSHong Zhang 3046a21f80fcSHong Zhang Output Parameter: 3047a21f80fcSHong Zhang . ival - value of MUMPS INFOG(icntl) 3048a21f80fcSHong Zhang 3049a21f80fcSHong Zhang Level: beginner 3050a21f80fcSHong Zhang 305196a0c994SBarry Smith References: 3052606c0280SSatish Balay . * - MUMPS Users' Guide 3053a21f80fcSHong Zhang 30542ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3055a21f80fcSHong Zhang @*/ 3056d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3057d71ae5a4SJacob Faibussowitsch { 3058bc6112feSHong Zhang PetscFunctionBegin; 30592989dfd4SHong Zhang PetscValidType(F, 1); 306028b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3061ca810319SHong Zhang PetscValidIntPointer(ival, 3); 3062cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 30633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3064bc6112feSHong Zhang } 3065bc6112feSHong Zhang 3066a21f80fcSHong Zhang /*@ 3067a21f80fcSHong Zhang MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3068a21f80fcSHong Zhang 3069c3339decSBarry Smith Logically Collective 3070a21f80fcSHong Zhang 3071a21f80fcSHong Zhang Input Parameters: 307211a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3073a21f80fcSHong Zhang - icntl - index of MUMPS parameter array RINFO() 3074a21f80fcSHong Zhang 3075a21f80fcSHong Zhang Output Parameter: 3076a21f80fcSHong Zhang . val - value of MUMPS RINFO(icntl) 3077a21f80fcSHong Zhang 3078a21f80fcSHong Zhang Level: beginner 3079a21f80fcSHong Zhang 308096a0c994SBarry Smith References: 3081606c0280SSatish Balay . * - MUMPS Users' Guide 3082a21f80fcSHong Zhang 30832ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3084a21f80fcSHong Zhang @*/ 3085d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3086d71ae5a4SJacob Faibussowitsch { 3087bc6112feSHong Zhang PetscFunctionBegin; 30882989dfd4SHong Zhang PetscValidType(F, 1); 308928b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3090bc6112feSHong Zhang PetscValidRealPointer(val, 3); 3091cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 30923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3093bc6112feSHong Zhang } 3094bc6112feSHong Zhang 3095a21f80fcSHong Zhang /*@ 3096a21f80fcSHong Zhang MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3097a21f80fcSHong Zhang 3098c3339decSBarry Smith Logically Collective 3099a21f80fcSHong Zhang 3100a21f80fcSHong Zhang Input Parameters: 310111a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3102a21f80fcSHong Zhang - icntl - index of MUMPS parameter array RINFOG() 3103a21f80fcSHong Zhang 3104a21f80fcSHong Zhang Output Parameter: 3105a21f80fcSHong Zhang . val - value of MUMPS RINFOG(icntl) 3106a21f80fcSHong Zhang 3107a21f80fcSHong Zhang Level: beginner 3108a21f80fcSHong Zhang 310996a0c994SBarry Smith References: 3110606c0280SSatish Balay . * - MUMPS Users' Guide 3111a21f80fcSHong Zhang 31122ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3113a21f80fcSHong Zhang @*/ 3114d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3115d71ae5a4SJacob Faibussowitsch { 3116bc6112feSHong Zhang PetscFunctionBegin; 31172989dfd4SHong Zhang PetscValidType(F, 1); 311828b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3119bc6112feSHong Zhang PetscValidRealPointer(val, 3); 3120cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 31213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3122bc6112feSHong Zhang } 3123bc6112feSHong Zhang 31245c0bae8cSAshish Patel /*@ 31255c0bae8cSAshish Patel MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 31265c0bae8cSAshish Patel 31275c0bae8cSAshish Patel Logically Collective 31285c0bae8cSAshish Patel 31295c0bae8cSAshish Patel Input Parameter: 31305c0bae8cSAshish Patel . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 31315c0bae8cSAshish Patel 31325c0bae8cSAshish Patel Output Parameters: 31335c0bae8cSAshish Patel + size - local size of the array. The size of the array is non-zero only on the host. 31345c0bae8cSAshish Patel - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 31355c0bae8cSAshish Patel for freeing this array. 31365c0bae8cSAshish Patel 31375c0bae8cSAshish Patel Level: beginner 31385c0bae8cSAshish Patel 31395c0bae8cSAshish Patel References: 31405c0bae8cSAshish Patel . * - MUMPS Users' Guide 31415c0bae8cSAshish Patel 31425c0bae8cSAshish Patel .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 31435c0bae8cSAshish Patel @*/ 31445c0bae8cSAshish Patel PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 31455c0bae8cSAshish Patel { 31465c0bae8cSAshish Patel PetscFunctionBegin; 31475c0bae8cSAshish Patel PetscValidType(F, 1); 31485c0bae8cSAshish Patel PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 31495c0bae8cSAshish Patel PetscValidIntPointer(size, 3); 31505c0bae8cSAshish Patel PetscValidPointer(array, 4); 31515c0bae8cSAshish Patel PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 31525c0bae8cSAshish Patel PetscFunctionReturn(PETSC_SUCCESS); 31535c0bae8cSAshish Patel } 31545c0bae8cSAshish Patel 315524b6179bSKris Buschelman /*MC 31562692d6eeSBarry Smith MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 315724b6179bSKris Buschelman distributed and sequential matrices via the external package MUMPS. 315824b6179bSKris Buschelman 315911a5261eSBarry Smith Works with `MATAIJ` and `MATSBAIJ` matrices 316024b6179bSKris Buschelman 3161c2b89b5dSBarry Smith Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3162c2b89b5dSBarry Smith 31632ef1f0ffSBarry Smith Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 31642ef1f0ffSBarry Smith See details below. 3165217d3b1eSJunchao Zhang 31662ef1f0ffSBarry Smith Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3167c2b89b5dSBarry Smith 316824b6179bSKris Buschelman Options Database Keys: 31694422a9fcSPatrick Sanan + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 31704422a9fcSPatrick Sanan . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 31714422a9fcSPatrick Sanan . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 31724422a9fcSPatrick Sanan . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 31734422a9fcSPatrick Sanan . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3174b53c1a7fSBarry Smith . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3175b53c1a7fSBarry Smith Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 31764422a9fcSPatrick Sanan . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 31774422a9fcSPatrick Sanan . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 31784422a9fcSPatrick Sanan . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 31794422a9fcSPatrick Sanan . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 31804422a9fcSPatrick Sanan . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 31814422a9fcSPatrick Sanan . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 318245e3843bSPierre Jolivet . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 31834422a9fcSPatrick Sanan . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 318425aac85cSJunchao Zhang . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 31854422a9fcSPatrick Sanan . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 31864422a9fcSPatrick Sanan . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 31874422a9fcSPatrick Sanan . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 31884422a9fcSPatrick Sanan . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 31894422a9fcSPatrick Sanan . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 31904422a9fcSPatrick Sanan . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 31914422a9fcSPatrick Sanan . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 31924422a9fcSPatrick Sanan . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 31934422a9fcSPatrick Sanan . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 31944422a9fcSPatrick Sanan . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3195a0e18203SThibaut Appel . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3196a0e18203SThibaut Appel . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3197a0e18203SThibaut Appel . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 31984422a9fcSPatrick Sanan . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 31994422a9fcSPatrick Sanan . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 32004422a9fcSPatrick Sanan . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 32014422a9fcSPatrick Sanan . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3202217d3b1eSJunchao Zhang . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3203a0e18203SThibaut Appel . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3204217d3b1eSJunchao Zhang - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3205217d3b1eSJunchao Zhang Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 320624b6179bSKris Buschelman 320724b6179bSKris Buschelman Level: beginner 320824b6179bSKris Buschelman 320995452b02SPatrick Sanan Notes: 32102ef1f0ffSBarry Smith MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 32112ef1f0ffSBarry Smith error if the matrix is Hermitian. 321238548759SBarry Smith 321326cc229bSBarry Smith When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 321426cc229bSBarry Smith `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 321526cc229bSBarry Smith 32162ef1f0ffSBarry Smith When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 32172ef1f0ffSBarry Smith the failure with 32182ef1f0ffSBarry Smith .vb 32192ef1f0ffSBarry Smith KSPGetPC(ksp,&pc); 32202ef1f0ffSBarry Smith PCFactorGetMatrix(pc,&mat); 32212ef1f0ffSBarry Smith MatMumpsGetInfo(mat,....); 32222ef1f0ffSBarry Smith MatMumpsGetInfog(mat,....); etc. 32232ef1f0ffSBarry Smith .ve 32242ef1f0ffSBarry Smith Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 32259fc87aa7SBarry Smith 3226a5399872SJunchao Zhang MUMPS provides 64-bit integer support in two build modes: 3227a5399872SJunchao Zhang full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3228a5399872SJunchao Zhang requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 32298fcaa860SBarry Smith 3230a5399872SJunchao Zhang selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3231a5399872SJunchao Zhang MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3232a5399872SJunchao Zhang columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3233a5399872SJunchao Zhang integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3234a5399872SJunchao Zhang 3235a5399872SJunchao Zhang With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3236a5399872SJunchao Zhang 3237a5399872SJunchao Zhang Two modes to run MUMPS/PETSc with OpenMP 32382ef1f0ffSBarry Smith .vb 32392ef1f0ffSBarry Smith Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 32402ef1f0ffSBarry Smith threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 32412ef1f0ffSBarry Smith .ve 32428fcaa860SBarry Smith 32432ef1f0ffSBarry Smith .vb 32442ef1f0ffSBarry Smith -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 32452ef1f0ffSBarry Smith if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 32462ef1f0ffSBarry Smith .ve 32478fcaa860SBarry Smith 32488fcaa860SBarry Smith To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 32492ef1f0ffSBarry Smith (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 32502ef1f0ffSBarry Smith (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 32518fcaa860SBarry Smith libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 32528fcaa860SBarry Smith (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3253217d3b1eSJunchao Zhang 32548fcaa860SBarry Smith If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3255217d3b1eSJunchao Zhang processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3256217d3b1eSJunchao Zhang size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3257217d3b1eSJunchao Zhang are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3258217d3b1eSJunchao Zhang by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3259217d3b1eSJunchao Zhang In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3260217d3b1eSJunchao Zhang if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3261217d3b1eSJunchao Zhang MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3262217d3b1eSJunchao Zhang cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3263217d3b1eSJunchao Zhang problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 32648fcaa860SBarry Smith For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3265217d3b1eSJunchao Zhang examine the mapping result. 3266217d3b1eSJunchao Zhang 326711a5261eSBarry Smith PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 326811a5261eSBarry Smith for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 326911a5261eSBarry Smith calls `omp_set_num_threads`(m) internally before calling MUMPS. 3270217d3b1eSJunchao Zhang 3271217d3b1eSJunchao Zhang References: 3272606c0280SSatish Balay + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3273606c0280SSatish Balay - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3274217d3b1eSJunchao Zhang 32752ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 327624b6179bSKris Buschelman M*/ 327724b6179bSKris Buschelman 3278d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3279d71ae5a4SJacob Faibussowitsch { 328035bd34faSBarry Smith PetscFunctionBegin; 32812692d6eeSBarry Smith *type = MATSOLVERMUMPS; 32823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 328335bd34faSBarry Smith } 328435bd34faSBarry Smith 3285bccb9932SShri Abhyankar /* MatGetFactor for Seq and MPI AIJ matrices */ 3286d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3287d71ae5a4SJacob Faibussowitsch { 32882877fffaSHong Zhang Mat B; 32892877fffaSHong Zhang Mat_MUMPS *mumps; 3290ace3abfcSBarry Smith PetscBool isSeqAIJ; 32912c7c0729SBarry Smith PetscMPIInt size; 32922877fffaSHong Zhang 32932877fffaSHong Zhang PetscFunctionBegin; 3294eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 3295*03e5aca4SStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3296*03e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3297*03e5aca4SStefano Zampini *F = NULL; 3298*03e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 3299*03e5aca4SStefano Zampini } 3300eb1ec7c1SStefano Zampini #endif 33012877fffaSHong Zhang /* Create the factorization matrix */ 33029566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 33039566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 33049566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 33059566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 33069566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 33072877fffaSHong Zhang 33084dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 33092205254eSKarl Rupp 33102877fffaSHong Zhang B->ops->view = MatView_MUMPS; 331135bd34faSBarry Smith B->ops->getinfo = MatGetInfo_MUMPS; 33122205254eSKarl Rupp 33139566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 33149566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 33159566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 33169566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 33179566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 33189566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 33199566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 33209566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 33219566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 33229566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 33239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 33245c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 33259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 33269566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 33276444a565SStefano Zampini 3328450b117fSShri Abhyankar if (ftype == MAT_FACTOR_LU) { 3329450b117fSShri Abhyankar B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3330d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 3331bccb9932SShri Abhyankar if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3332bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 33339566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3334746480a1SHong Zhang mumps->sym = 0; 3335dcd589f8SShri Abhyankar } else { 333667877ebaSShri Abhyankar B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3337450b117fSShri Abhyankar B->factortype = MAT_FACTOR_CHOLESKY; 3338bccb9932SShri Abhyankar if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3339bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 33409566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 334159ac8732SStefano Zampini #if defined(PETSC_USE_COMPLEX) 334259ac8732SStefano Zampini mumps->sym = 2; 334359ac8732SStefano Zampini #else 3344b94d7dedSBarry Smith if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 33456fdc2a6dSBarry Smith else mumps->sym = 2; 334659ac8732SStefano Zampini #endif 3347450b117fSShri Abhyankar } 33482877fffaSHong Zhang 334900c67f3bSHong Zhang /* set solvertype */ 33509566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 33519566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 33529566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 33532c7c0729SBarry Smith if (size == 1) { 33544ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3355f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 33562c7c0729SBarry Smith } 33572877fffaSHong Zhang B->ops->destroy = MatDestroy_MUMPS; 3358e69c285eSBarry Smith B->data = (void *)mumps; 33592205254eSKarl Rupp 33602877fffaSHong Zhang *F = B; 3361413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3362413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3363413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3364d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 33653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 33662877fffaSHong Zhang } 33672877fffaSHong Zhang 3368bccb9932SShri Abhyankar /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3369d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3370d71ae5a4SJacob Faibussowitsch { 33712877fffaSHong Zhang Mat B; 33722877fffaSHong Zhang Mat_MUMPS *mumps; 3373ace3abfcSBarry Smith PetscBool isSeqSBAIJ; 33742c7c0729SBarry Smith PetscMPIInt size; 33752877fffaSHong Zhang 33762877fffaSHong Zhang PetscFunctionBegin; 3377eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 3378*03e5aca4SStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3379*03e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3380*03e5aca4SStefano Zampini *F = NULL; 3381*03e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 3382*03e5aca4SStefano Zampini } 3383eb1ec7c1SStefano Zampini #endif 33849566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 33859566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 33869566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 33879566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 3388e69c285eSBarry Smith 33894dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 33909566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3391bccb9932SShri Abhyankar if (isSeqSBAIJ) { 339216ebf90aSShri Abhyankar mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3393dcd589f8SShri Abhyankar } else { 3394bccb9932SShri Abhyankar mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3395bccb9932SShri Abhyankar } 3396bccb9932SShri Abhyankar 339767877ebaSShri Abhyankar B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3398bccb9932SShri Abhyankar B->ops->view = MatView_MUMPS; 3399722b6324SPierre Jolivet B->ops->getinfo = MatGetInfo_MUMPS; 34002205254eSKarl Rupp 34019566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 34029566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 34039566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 34049566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 34059566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 34069566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 34079566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 34089566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 34099566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 34109566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 34119566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 34125c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 34139566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 34149566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 34152205254eSKarl Rupp 3416f4762488SHong Zhang B->factortype = MAT_FACTOR_CHOLESKY; 341759ac8732SStefano Zampini #if defined(PETSC_USE_COMPLEX) 341859ac8732SStefano Zampini mumps->sym = 2; 341959ac8732SStefano Zampini #else 3420b94d7dedSBarry Smith if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 34216fdc2a6dSBarry Smith else mumps->sym = 2; 342259ac8732SStefano Zampini #endif 3423a214ac2aSShri Abhyankar 342400c67f3bSHong Zhang /* set solvertype */ 34259566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 34269566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 34279566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 34282c7c0729SBarry Smith if (size == 1) { 34294ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3430f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 34312c7c0729SBarry Smith } 34329566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3433f3c0ef26SHong Zhang B->ops->destroy = MatDestroy_MUMPS; 3434e69c285eSBarry Smith B->data = (void *)mumps; 34352205254eSKarl Rupp 34362877fffaSHong Zhang *F = B; 3437413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3438413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3439413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3440d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 34413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 34422877fffaSHong Zhang } 344397969023SHong Zhang 3444d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3445d71ae5a4SJacob Faibussowitsch { 344667877ebaSShri Abhyankar Mat B; 344767877ebaSShri Abhyankar Mat_MUMPS *mumps; 3448ace3abfcSBarry Smith PetscBool isSeqBAIJ; 34492c7c0729SBarry Smith PetscMPIInt size; 345067877ebaSShri Abhyankar 345167877ebaSShri Abhyankar PetscFunctionBegin; 345267877ebaSShri Abhyankar /* Create the factorization matrix */ 34539566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 34549566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 34559566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 34569566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 34579566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 3458450b117fSShri Abhyankar 34594dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 3460450b117fSShri Abhyankar if (ftype == MAT_FACTOR_LU) { 3461450b117fSShri Abhyankar B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3462450b117fSShri Abhyankar B->factortype = MAT_FACTOR_LU; 3463bccb9932SShri Abhyankar if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3464bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3465746480a1SHong Zhang mumps->sym = 0; 34669566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3467546078acSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3468bccb9932SShri Abhyankar 3469450b117fSShri Abhyankar B->ops->view = MatView_MUMPS; 3470722b6324SPierre Jolivet B->ops->getinfo = MatGetInfo_MUMPS; 34712205254eSKarl Rupp 34729566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 34739566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 34749566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 34759566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 34769566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 34779566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 34789566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 34799566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 34809566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 34819566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 34829566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 34835c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 34849566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 34859566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3486450b117fSShri Abhyankar 348700c67f3bSHong Zhang /* set solvertype */ 34889566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 34899566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 34909566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 34912c7c0729SBarry Smith if (size == 1) { 34924ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3493f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 34942c7c0729SBarry Smith } 34957ee00b23SStefano Zampini B->ops->destroy = MatDestroy_MUMPS; 34967ee00b23SStefano Zampini B->data = (void *)mumps; 34977ee00b23SStefano Zampini 34987ee00b23SStefano Zampini *F = B; 3499413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3500413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3501413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3502d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 35033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 35047ee00b23SStefano Zampini } 35057ee00b23SStefano Zampini 35067ee00b23SStefano Zampini /* MatGetFactor for Seq and MPI SELL matrices */ 3507d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3508d71ae5a4SJacob Faibussowitsch { 35097ee00b23SStefano Zampini Mat B; 35107ee00b23SStefano Zampini Mat_MUMPS *mumps; 35117ee00b23SStefano Zampini PetscBool isSeqSELL; 35122c7c0729SBarry Smith PetscMPIInt size; 35137ee00b23SStefano Zampini 35147ee00b23SStefano Zampini PetscFunctionBegin; 35157ee00b23SStefano Zampini /* Create the factorization matrix */ 35169566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 35179566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 35189566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 35199566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 35209566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 35217ee00b23SStefano Zampini 35224dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 35237ee00b23SStefano Zampini 35247ee00b23SStefano Zampini B->ops->view = MatView_MUMPS; 35257ee00b23SStefano Zampini B->ops->getinfo = MatGetInfo_MUMPS; 35267ee00b23SStefano Zampini 35279566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 35289566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 35299566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 35309566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 35319566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 35329566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 35339566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 35349566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 35359566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 35369566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 35379566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 35385c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 35397ee00b23SStefano Zampini 35407ee00b23SStefano Zampini if (ftype == MAT_FACTOR_LU) { 35417ee00b23SStefano Zampini B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 35427ee00b23SStefano Zampini B->factortype = MAT_FACTOR_LU; 35437ee00b23SStefano Zampini if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 35447ee00b23SStefano Zampini else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 35457ee00b23SStefano Zampini mumps->sym = 0; 35469566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 35477ee00b23SStefano Zampini } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 35487ee00b23SStefano Zampini 35497ee00b23SStefano Zampini /* set solvertype */ 35509566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 35519566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 35529566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 35532c7c0729SBarry Smith if (size == 1) { 35544ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3555f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 35562c7c0729SBarry Smith } 3557450b117fSShri Abhyankar B->ops->destroy = MatDestroy_MUMPS; 3558e69c285eSBarry Smith B->data = (void *)mumps; 35592205254eSKarl Rupp 3560450b117fSShri Abhyankar *F = B; 3561413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 3562413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 3563413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 3564d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 35653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3566450b117fSShri Abhyankar } 356742c9c57cSBarry Smith 35689d0448ceSStefano Zampini /* MatGetFactor for MATNEST matrices */ 35699d0448ceSStefano Zampini static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 35709d0448ceSStefano Zampini { 35719d0448ceSStefano Zampini Mat B, **mats; 35729d0448ceSStefano Zampini Mat_MUMPS *mumps; 35739d0448ceSStefano Zampini PetscInt nr, nc; 35749d0448ceSStefano Zampini PetscMPIInt size; 3575*03e5aca4SStefano Zampini PetscBool flg = PETSC_TRUE; 35769d0448ceSStefano Zampini 35779d0448ceSStefano Zampini PetscFunctionBegin; 35789d0448ceSStefano Zampini #if defined(PETSC_USE_COMPLEX) 3579*03e5aca4SStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3580*03e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3581*03e5aca4SStefano Zampini *F = NULL; 3582*03e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 3583*03e5aca4SStefano Zampini } 35849d0448ceSStefano Zampini #endif 35859d0448ceSStefano Zampini 3586*03e5aca4SStefano Zampini /* Return if some condition is not satisfied */ 3587*03e5aca4SStefano Zampini *F = NULL; 35889d0448ceSStefano Zampini PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 35899d0448ceSStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY) { 35909d0448ceSStefano Zampini IS *rows, *cols; 35919d0448ceSStefano Zampini PetscInt *m, *M; 35929d0448ceSStefano Zampini 35939d0448ceSStefano Zampini PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 35949d0448ceSStefano Zampini PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 35959d0448ceSStefano Zampini PetscCall(MatNestGetISs(A, rows, cols)); 35969d0448ceSStefano Zampini for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3597*03e5aca4SStefano Zampini if (!flg) { 3598*03e5aca4SStefano Zampini PetscCall(PetscFree2(rows, cols)); 3599*03e5aca4SStefano Zampini PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3600*03e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 3601*03e5aca4SStefano Zampini } 36029d0448ceSStefano Zampini PetscCall(PetscMalloc2(nr, &m, nr, &M)); 36039d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 36049d0448ceSStefano Zampini for (PetscInt r = 0; flg && r < nr; r++) 36059d0448ceSStefano Zampini for (PetscInt k = r + 1; flg && k < nr; k++) 36069d0448ceSStefano Zampini if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 36079d0448ceSStefano Zampini PetscCall(PetscFree2(m, M)); 36089d0448ceSStefano Zampini PetscCall(PetscFree2(rows, cols)); 3609*03e5aca4SStefano Zampini if (!flg) { 3610*03e5aca4SStefano Zampini PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3611*03e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 3612*03e5aca4SStefano Zampini } 36139d0448ceSStefano Zampini } 36149d0448ceSStefano Zampini 36159d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 36169d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 36179d0448ceSStefano Zampini Mat sub = mats[r][c]; 36185d955bbbSStefano Zampini PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans; 36199d0448ceSStefano Zampini 36209d0448ceSStefano Zampini if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 36215d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 36225d955bbbSStefano Zampini if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 36235d955bbbSStefano Zampini else { 36245d955bbbSStefano Zampini PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 36255d955bbbSStefano Zampini if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 36265d955bbbSStefano Zampini } 36279d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 36289d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 36299d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 36309d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 36319d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 36329d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 36339d0448ceSStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY) { 3634*03e5aca4SStefano Zampini if (r == c && !isSeqAIJ && !isMPIAIJ && !isSeqSBAIJ && !isMPISBAIJ) { 3635*03e5aca4SStefano Zampini PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3636*03e5aca4SStefano Zampini flg = PETSC_FALSE; 3637*03e5aca4SStefano Zampini } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 3638*03e5aca4SStefano Zampini PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3639*03e5aca4SStefano Zampini flg = PETSC_FALSE; 3640*03e5aca4SStefano Zampini } 3641*03e5aca4SStefano Zampini } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 3642*03e5aca4SStefano Zampini PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3643*03e5aca4SStefano Zampini flg = PETSC_FALSE; 36449d0448ceSStefano Zampini } 36459d0448ceSStefano Zampini } 3646*03e5aca4SStefano Zampini } 3647*03e5aca4SStefano Zampini if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 36489d0448ceSStefano Zampini 36499d0448ceSStefano Zampini /* Create the factorization matrix */ 36509d0448ceSStefano Zampini PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 36519d0448ceSStefano Zampini PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 36529d0448ceSStefano Zampini PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 36539d0448ceSStefano Zampini PetscCall(MatSetUp(B)); 36549d0448ceSStefano Zampini 36559d0448ceSStefano Zampini PetscCall(PetscNew(&mumps)); 36569d0448ceSStefano Zampini 36579d0448ceSStefano Zampini B->ops->view = MatView_MUMPS; 36589d0448ceSStefano Zampini B->ops->getinfo = MatGetInfo_MUMPS; 36599d0448ceSStefano Zampini 36609d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 36619d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 36629d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 36639d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 36649d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 36659d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 36669d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 36679d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 36689d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 36699d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 36709d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 36719d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 36729d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 36739d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 36749d0448ceSStefano Zampini 36759d0448ceSStefano Zampini if (ftype == MAT_FACTOR_LU) { 36769d0448ceSStefano Zampini B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 36779d0448ceSStefano Zampini B->factortype = MAT_FACTOR_LU; 36789d0448ceSStefano Zampini mumps->sym = 0; 36799d0448ceSStefano Zampini } else { 36809d0448ceSStefano Zampini B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 36819d0448ceSStefano Zampini B->factortype = MAT_FACTOR_CHOLESKY; 36829d0448ceSStefano Zampini #if defined(PETSC_USE_COMPLEX) 36839d0448ceSStefano Zampini mumps->sym = 2; 36849d0448ceSStefano Zampini #else 36859d0448ceSStefano Zampini if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 36869d0448ceSStefano Zampini else mumps->sym = 2; 36879d0448ceSStefano Zampini #endif 36889d0448ceSStefano Zampini } 36899d0448ceSStefano Zampini mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 36909d0448ceSStefano Zampini PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 36919d0448ceSStefano Zampini 36929d0448ceSStefano Zampini PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 36939d0448ceSStefano Zampini if (size == 1) { 36949d0448ceSStefano Zampini /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 36959d0448ceSStefano Zampini B->canuseordering = PETSC_TRUE; 36969d0448ceSStefano Zampini } 36979d0448ceSStefano Zampini 36989d0448ceSStefano Zampini /* set solvertype */ 36999d0448ceSStefano Zampini PetscCall(PetscFree(B->solvertype)); 37009d0448ceSStefano Zampini PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 37019d0448ceSStefano Zampini B->ops->destroy = MatDestroy_MUMPS; 37029d0448ceSStefano Zampini B->data = (void *)mumps; 37039d0448ceSStefano Zampini 37049d0448ceSStefano Zampini *F = B; 37059d0448ceSStefano Zampini mumps->id.job = JOB_NULL; 37069d0448ceSStefano Zampini mumps->ICNTL_pre = NULL; 37079d0448ceSStefano Zampini mumps->CNTL_pre = NULL; 37089d0448ceSStefano Zampini mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 37099d0448ceSStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 37109d0448ceSStefano Zampini } 37119d0448ceSStefano Zampini 3712d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3713d71ae5a4SJacob Faibussowitsch { 371442c9c57cSBarry Smith PetscFunctionBegin; 37159566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 37169566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 37179566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 37189566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 37199566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 37209566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 37219566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 37229566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 37239566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 37249566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 37259566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 37269d0448ceSStefano Zampini PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 37279d0448ceSStefano Zampini PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 37283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 372942c9c57cSBarry Smith } 3730