1397b6df1SKris Buschelman /* 2c2b5dc30SHong Zhang Provides an interface to the MUMPS sparse solver 3397b6df1SKris Buschelman */ 467602552SJunchao Zhang #include <petscpkg_version.h> 59d0448ceSStefano Zampini #include <petscsf.h> 6c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 87ee00b23SStefano Zampini #include <../src/mat/impls/sell/mpi/mpisell.h> 9cf053153SJunchao Zhang #include <petsc/private/vecimpl.h> 10397b6df1SKris Buschelman 119261f6e4SBarry Smith #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 129261f6e4SBarry Smith 13397b6df1SKris Buschelman EXTERN_C_BEGIN 14cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 15cf053153SJunchao Zhang #include <cmumps_c.h> 16cf053153SJunchao Zhang #include <zmumps_c.h> 17cf053153SJunchao Zhang #include <smumps_c.h> 18cf053153SJunchao Zhang #include <dmumps_c.h> 19cf053153SJunchao Zhang #else 20397b6df1SKris Buschelman #if defined(PETSC_USE_COMPLEX) 212907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 222907cef9SHong Zhang #include <cmumps_c.h> 23cf053153SJunchao Zhang #define MUMPS_c cmumps_c 24cf053153SJunchao Zhang #define MumpsScalar CMUMPS_COMPLEX 252907cef9SHong Zhang #else 26c6db04a5SJed Brown #include <zmumps_c.h> 27cf053153SJunchao Zhang #define MUMPS_c zmumps_c 28cf053153SJunchao Zhang #define MumpsScalar ZMUMPS_COMPLEX 292907cef9SHong Zhang #endif 302907cef9SHong Zhang #else 312907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 322907cef9SHong Zhang #include <smumps_c.h> 33cf053153SJunchao Zhang #define MUMPS_c smumps_c 34cf053153SJunchao Zhang #define MumpsScalar SMUMPS_REAL 35397b6df1SKris Buschelman #else 36c6db04a5SJed Brown #include <dmumps_c.h> 37cf053153SJunchao Zhang #define MUMPS_c dmumps_c 38cf053153SJunchao Zhang #define MumpsScalar DMUMPS_REAL 39cf053153SJunchao Zhang #endif 40397b6df1SKris Buschelman #endif 412907cef9SHong Zhang #endif 42*d77aa1b8SPierre Jolivet #if defined(PETSC_USE_COMPLEX) 43*d77aa1b8SPierre Jolivet #if defined(PETSC_USE_REAL_SINGLE) 44*d77aa1b8SPierre Jolivet #define MUMPS_STRUC_C CMUMPS_STRUC_C 45*d77aa1b8SPierre Jolivet #else 46*d77aa1b8SPierre Jolivet #define MUMPS_STRUC_C ZMUMPS_STRUC_C 47*d77aa1b8SPierre Jolivet #endif 48*d77aa1b8SPierre Jolivet #else 49*d77aa1b8SPierre Jolivet #if defined(PETSC_USE_REAL_SINGLE) 50*d77aa1b8SPierre Jolivet #define MUMPS_STRUC_C SMUMPS_STRUC_C 51*d77aa1b8SPierre Jolivet #else 52*d77aa1b8SPierre Jolivet #define MUMPS_STRUC_C DMUMPS_STRUC_C 53*d77aa1b8SPierre Jolivet #endif 54*d77aa1b8SPierre Jolivet #endif 55397b6df1SKris Buschelman EXTERN_C_END 56cf053153SJunchao Zhang 57397b6df1SKris Buschelman #define JOB_INIT -1 58413bcc21SPierre Jolivet #define JOB_NULL 0 593d472b54SHong Zhang #define JOB_FACTSYMBOLIC 1 603d472b54SHong Zhang #define JOB_FACTNUMERIC 2 613d472b54SHong Zhang #define JOB_SOLVE 3 62397b6df1SKris Buschelman #define JOB_END -2 633d472b54SHong Zhang 64a6053eceSJunchao Zhang /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 65a6053eceSJunchao Zhang number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 66a6053eceSJunchao Zhang naming convention in PetscMPIInt, PetscBLASInt etc. 67a6053eceSJunchao Zhang */ 68a6053eceSJunchao Zhang typedef MUMPS_INT PetscMUMPSInt; 69a6053eceSJunchao Zhang 7067602552SJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 7167602552SJunchao Zhang #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 72f0b74427SPierre Jolivet #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 7367602552SJunchao Zhang #endif 74a6053eceSJunchao Zhang #else 7567602552SJunchao Zhang #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 76f0b74427SPierre Jolivet #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 7767602552SJunchao Zhang #endif 7867602552SJunchao Zhang #endif 7967602552SJunchao Zhang 80a6053eceSJunchao Zhang #define MPIU_MUMPSINT MPI_INT 81a6053eceSJunchao Zhang #define PETSC_MUMPS_INT_MAX 2147483647 82a6053eceSJunchao Zhang #define PETSC_MUMPS_INT_MIN -2147483648 83a6053eceSJunchao Zhang 84a6053eceSJunchao Zhang /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 856497c311SBarry Smith static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b) 86d71ae5a4SJacob Faibussowitsch { 87a6053eceSJunchao Zhang PetscFunctionBegin; 88ece88022SPierre Jolivet #if PetscDefined(USE_64BIT_INDICES) 892c71b3e2SJacob Faibussowitsch PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 90ece88022SPierre Jolivet #endif 9157508eceSPierre Jolivet *b = (PetscMUMPSInt)a; 923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 93a6053eceSJunchao Zhang } 94a6053eceSJunchao Zhang 95a6053eceSJunchao Zhang /* Put these utility routines here since they are only used in this file */ 96ce78bad3SBarry Smith static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 97d71ae5a4SJacob Faibussowitsch { 98a6053eceSJunchao Zhang PetscInt myval; 99a6053eceSJunchao Zhang PetscBool myset; 1004d86920dSPierre Jolivet 101a6053eceSJunchao Zhang PetscFunctionBegin; 102a6053eceSJunchao Zhang /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 1039566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 1049566063dSJacob Faibussowitsch if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 105a6053eceSJunchao Zhang if (set) *set = myset; 1063ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 107a6053eceSJunchao Zhang } 108a6053eceSJunchao Zhang #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 109a6053eceSJunchao Zhang 110cf053153SJunchao Zhang // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C. 111cf053153SJunchao Zhang // 112cf053153SJunchao Zhang // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type. 113cf053153SJunchao Zhang // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size 114cf053153SJunchao Zhang // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall(). 115cf053153SJunchao Zhang // 116cf053153SJunchao Zhang // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface. 117cf053153SJunchao Zhang // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used. 118cf053153SJunchao Zhang // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers. 119cf053153SJunchao Zhang typedef struct { 120cf053153SJunchao Zhang PetscPrecision precision; // precision used by MUMPS 121cf053153SJunchao Zhang void *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex 122cf053153SJunchao Zhang 123cf053153SJunchao Zhang // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions. 124cf053153SJunchao Zhang MUMPS_INT sym, par, job; 125cf053153SJunchao Zhang MUMPS_INT comm_fortran; /* Fortran communicator */ 126cf053153SJunchao Zhang MUMPS_INT *icntl; 127cf053153SJunchao Zhang void *cntl; // MumpsReal, fixed size array 128cf053153SJunchao Zhang MUMPS_INT n; 129cf053153SJunchao Zhang MUMPS_INT nblk; 130cf053153SJunchao Zhang 131cf053153SJunchao Zhang /* Assembled entry */ 132cf053153SJunchao Zhang MUMPS_INT8 nnz; 133cf053153SJunchao Zhang MUMPS_INT *irn; 134cf053153SJunchao Zhang MUMPS_INT *jcn; 135cf053153SJunchao Zhang void *a; // MumpsScalar, centralized input 136cf053153SJunchao Zhang PetscCount a_len; 137cf053153SJunchao Zhang 138cf053153SJunchao Zhang /* Distributed entry */ 139cf053153SJunchao Zhang MUMPS_INT8 nnz_loc; 140cf053153SJunchao Zhang MUMPS_INT *irn_loc; 141cf053153SJunchao Zhang MUMPS_INT *jcn_loc; 142cf053153SJunchao Zhang void *a_loc; // MumpsScalar, distributed input 143cf053153SJunchao Zhang PetscCount a_loc_len; 144cf053153SJunchao Zhang 145cf053153SJunchao Zhang /* Matrix by blocks */ 146cf053153SJunchao Zhang MUMPS_INT *blkptr; 147cf053153SJunchao Zhang MUMPS_INT *blkvar; 148cf053153SJunchao Zhang 149cf053153SJunchao Zhang /* Ordering, if given by user */ 150cf053153SJunchao Zhang MUMPS_INT *perm_in; 151cf053153SJunchao Zhang 152cf053153SJunchao Zhang /* RHS, solution, ouptput data and statistics */ 153cf053153SJunchao Zhang void *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc; // MumpsScalar buffers 154cf053153SJunchao Zhang PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar 155cf053153SJunchao Zhang 156cf053153SJunchao Zhang MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc; 1577096bf6aSJunchao Zhang MUMPS_INT nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc; 1587096bf6aSJunchao Zhang // MUMPS_INT nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0) 159cf053153SJunchao Zhang MUMPS_INT schur_lld; 160cf053153SJunchao Zhang MUMPS_INT *info, *infog; // fixed size array 161cf053153SJunchao Zhang void *rinfo, *rinfog; // MumpsReal, fixed size array 162cf053153SJunchao Zhang 163cf053153SJunchao Zhang /* Null space */ 164cf053153SJunchao Zhang MUMPS_INT *pivnul_list; // allocated by MUMPS! 165cf053153SJunchao Zhang MUMPS_INT *mapping; // allocated by MUMPS! 166cf053153SJunchao Zhang 167cf053153SJunchao Zhang /* Schur */ 168cf053153SJunchao Zhang MUMPS_INT size_schur; 169cf053153SJunchao Zhang MUMPS_INT *listvar_schur; 170cf053153SJunchao Zhang void *schur; // MumpsScalar 171cf053153SJunchao Zhang PetscCount schur_len; 172cf053153SJunchao Zhang 173cf053153SJunchao Zhang /* For out-of-core */ 174cf053153SJunchao Zhang char *ooc_tmpdir; // fixed size array 175cf053153SJunchao Zhang char *ooc_prefix; // fixed size array 176cf053153SJunchao Zhang } XMUMPS_STRUC_C; 177cf053153SJunchao Zhang 178cf053153SJunchao Zhang // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct 179cf053153SJunchao Zhang #define AllocatInternalID(MUMPS_STRUC_T, outer) \ 180cf053153SJunchao Zhang do { \ 181cf053153SJunchao Zhang MUMPS_STRUC_T *inner; \ 182cf053153SJunchao Zhang PetscCall(PetscNew(&inner)); \ 183cf053153SJunchao Zhang outer->icntl = inner->icntl; \ 184cf053153SJunchao Zhang outer->cntl = inner->cntl; \ 185cf053153SJunchao Zhang outer->info = inner->info; \ 186cf053153SJunchao Zhang outer->infog = inner->infog; \ 187cf053153SJunchao Zhang outer->rinfo = inner->rinfo; \ 188cf053153SJunchao Zhang outer->rinfog = inner->rinfog; \ 189cf053153SJunchao Zhang outer->ooc_tmpdir = inner->ooc_tmpdir; \ 190cf053153SJunchao Zhang outer->ooc_prefix = inner->ooc_prefix; \ 191cf053153SJunchao Zhang /* the three field should never change after init */ \ 192cf053153SJunchao Zhang inner->comm_fortran = outer->comm_fortran; \ 193cf053153SJunchao Zhang inner->par = outer->par; \ 194cf053153SJunchao Zhang inner->sym = outer->sym; \ 195cf053153SJunchao Zhang outer->internal_id = inner; \ 196cf053153SJunchao Zhang } while (0) 197cf053153SJunchao Zhang 198cf053153SJunchao Zhang // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given <precision>, and link fields of the outer and the inner 199cf053153SJunchao Zhang static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision) 200cf053153SJunchao Zhang { 201cf053153SJunchao Zhang PetscFunctionBegin; 202cf053153SJunchao Zhang outer->precision = precision; 203cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 204cf053153SJunchao Zhang #if defined(PETSC_USE_COMPLEX) 205cf053153SJunchao Zhang if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(CMUMPS_STRUC_C, outer); 206cf053153SJunchao Zhang else AllocatInternalID(ZMUMPS_STRUC_C, outer); 207cf053153SJunchao Zhang #else 208cf053153SJunchao Zhang if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(SMUMPS_STRUC_C, outer); 209cf053153SJunchao Zhang else AllocatInternalID(DMUMPS_STRUC_C, outer); 210cf053153SJunchao Zhang #endif 211cf053153SJunchao Zhang #else 212cf053153SJunchao Zhang AllocatInternalID(MUMPS_STRUC_C, outer); 213cf053153SJunchao Zhang #endif 214cf053153SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 215cf053153SJunchao Zhang } 216cf053153SJunchao Zhang 217cf053153SJunchao Zhang #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \ 218cf053153SJunchao Zhang do { \ 219cf053153SJunchao Zhang MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \ 220cf053153SJunchao Zhang PetscCall(PetscFree(inner->a)); \ 221cf053153SJunchao Zhang PetscCall(PetscFree(inner->a_loc)); \ 222cf053153SJunchao Zhang PetscCall(PetscFree(inner->redrhs)); \ 223cf053153SJunchao Zhang PetscCall(PetscFree(inner->rhs)); \ 224cf053153SJunchao Zhang PetscCall(PetscFree(inner->rhs_sparse)); \ 225cf053153SJunchao Zhang PetscCall(PetscFree(inner->rhs_loc)); \ 226cf053153SJunchao Zhang PetscCall(PetscFree(inner->sol_loc)); \ 227cf053153SJunchao Zhang PetscCall(PetscFree(inner->schur)); \ 228cf053153SJunchao Zhang } while (0) 229cf053153SJunchao Zhang 230cf053153SJunchao Zhang static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer) 231cf053153SJunchao Zhang { 232cf053153SJunchao Zhang PetscFunctionBegin; 233cf053153SJunchao Zhang if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer 234cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 235cf053153SJunchao Zhang const PetscPrecision mumps_precision = outer->precision; 236cf053153SJunchao Zhang if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision 237cf053153SJunchao Zhang #if defined(PETSC_USE_COMPLEX) 238cf053153SJunchao Zhang if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer); 239cf053153SJunchao Zhang else FreeInternalIDFields(ZMUMPS_STRUC_C, outer); 240cf053153SJunchao Zhang #else 241cf053153SJunchao Zhang if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer); 242cf053153SJunchao Zhang else FreeInternalIDFields(DMUMPS_STRUC_C, outer); 243cf053153SJunchao Zhang #endif 244cf053153SJunchao Zhang } 245cf053153SJunchao Zhang #endif 246cf053153SJunchao Zhang PetscCall(PetscFree(outer->internal_id)); 247cf053153SJunchao Zhang } 248cf053153SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 249cf053153SJunchao Zhang } 250cf053153SJunchao Zhang 251cf053153SJunchao Zhang // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least <n> MumpsScalars in the given <precision> and return the address at <ma>. 252cf053153SJunchao Zhang // <convert> indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array. 253cf053153SJunchao Zhang // (For bravity, we use <ma> for array address and <m> for its length in MumpsScalar, though in code they should be <*ma> and <*m>) 254cf053153SJunchao Zhang // If <ma> already points to a buffer/array, on input <m> should be its length. Note the buffer might be freed if it is not big enough for this request. 255cf053153SJunchao Zhang // 256cf053153SJunchao Zhang // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same. 257cf053153SJunchao Zhang // 1) If they are different, a separate array will be made and its length and address will be provided at <m> and <ma> on output. 258cf053153SJunchao Zhang // 2) Otherwise, <pa> will be returned in <ma>, and <m> will be zero on output. 259cf053153SJunchao Zhang // 260cf053153SJunchao Zhang // 261cf053153SJunchao Zhang // Input parameters: 262cf053153SJunchao Zhang // + convert - whether to do PetscScalar to MumpsScalar conversion 263cf053153SJunchao Zhang // . n - length of the PetscScalar array 264cf053153SJunchao Zhang // . pa - [n]], points to the PetscScalar array 265cf053153SJunchao Zhang // . precision - precision of MumpsScalar 266cf053153SJunchao Zhang // . m - on input, length of an existing MumpsScalar array <ma> if any, otherwise *m is just zero. 267cf053153SJunchao Zhang // - ma - on input, an existing MumpsScalar array if any. 268cf053153SJunchao Zhang // 269cf053153SJunchao Zhang // Output parameters: 270cf053153SJunchao Zhang // + m - length of the MumpsScalar buffer at <ma> if MumpsScalar is different from PetscScalar, otherwise 0 271cf053153SJunchao Zhang // . ma - the MumpsScalar array, which could be an alias of <pa> when the two types are the same. 272cf053153SJunchao Zhang // 273cf053153SJunchao Zhang // Note: 274cf053153SJunchao Zhang // New memory, if allocated, is done via PetscMalloc1(), and is owned by caller. 275cf053153SJunchao Zhang static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma) 276cf053153SJunchao Zhang { 277cf053153SJunchao Zhang PetscFunctionBegin; 278cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 279cf053153SJunchao Zhang const PetscPrecision mumps_precision = precision; 280cf053153SJunchao Zhang PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision); 281cf053153SJunchao Zhang #if defined(PETSC_USE_COMPLEX) 282cf053153SJunchao Zhang if (mumps_precision != PETSC_SCALAR_PRECISION) { 283cf053153SJunchao Zhang if (mumps_precision == PETSC_PRECISION_SINGLE) { 284cf053153SJunchao Zhang if (*m < n) { 285cf053153SJunchao Zhang PetscCall(PetscFree(*ma)); 286cf053153SJunchao Zhang PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma)); 287cf053153SJunchao Zhang *m = n; 288cf053153SJunchao Zhang } 289cf053153SJunchao Zhang if (convert) { 290cf053153SJunchao Zhang CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma; 291cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) { 292cf053153SJunchao Zhang b[i].r = PetscRealPart(pa[i]); 293cf053153SJunchao Zhang b[i].i = PetscImaginaryPart(pa[i]); 2942893035cSJunchao Zhang } 295cf053153SJunchao Zhang } 296cf053153SJunchao Zhang } else { 297cf053153SJunchao Zhang if (*m < n) { 298cf053153SJunchao Zhang PetscCall(PetscFree(*ma)); 299cf053153SJunchao Zhang PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma)); 300cf053153SJunchao Zhang *m = n; 301cf053153SJunchao Zhang } 302cf053153SJunchao Zhang if (convert) { 303cf053153SJunchao Zhang ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma; 304cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) { 305cf053153SJunchao Zhang b[i].r = PetscRealPart(pa[i]); 306cf053153SJunchao Zhang b[i].i = PetscImaginaryPart(pa[i]); 307cf053153SJunchao Zhang } 308cf053153SJunchao Zhang } 309cf053153SJunchao Zhang } 310cf053153SJunchao Zhang } 311cf053153SJunchao Zhang #else 312cf053153SJunchao Zhang if (mumps_precision != PETSC_SCALAR_PRECISION) { 313cf053153SJunchao Zhang if (mumps_precision == PETSC_PRECISION_SINGLE) { 314cf053153SJunchao Zhang if (*m < n) { 315cf053153SJunchao Zhang PetscCall(PetscFree(*ma)); 316cf053153SJunchao Zhang PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma)); 317cf053153SJunchao Zhang *m = n; 318cf053153SJunchao Zhang } 319cf053153SJunchao Zhang if (convert) { 320cf053153SJunchao Zhang SMUMPS_REAL *b = *(SMUMPS_REAL **)ma; 321cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 322cf053153SJunchao Zhang } 323cf053153SJunchao Zhang } else { 324cf053153SJunchao Zhang if (*m < n) { 325cf053153SJunchao Zhang PetscCall(PetscFree(*ma)); 326cf053153SJunchao Zhang PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma)); 327cf053153SJunchao Zhang *m = n; 328cf053153SJunchao Zhang } 329cf053153SJunchao Zhang if (convert) { 330cf053153SJunchao Zhang DMUMPS_REAL *b = *(DMUMPS_REAL **)ma; 331cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 332cf053153SJunchao Zhang } 333cf053153SJunchao Zhang } 334cf053153SJunchao Zhang } 335cf053153SJunchao Zhang #endif 336cf053153SJunchao Zhang else 337cf053153SJunchao Zhang #endif 338cf053153SJunchao Zhang { 339cf053153SJunchao Zhang if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any 340cf053153SJunchao Zhang *ma = (void *)pa; // same precision, make them alias 341cf053153SJunchao Zhang *m = 0; 342cf053153SJunchao Zhang } 343cf053153SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 344cf053153SJunchao Zhang } 345cf053153SJunchao Zhang 346cf053153SJunchao Zhang // Cast a MumpsScalar array <ma[n]> in <mumps_precision> to a PetscScalar array at address <pa>. 347cf053153SJunchao Zhang // 348cf053153SJunchao Zhang // 1) If the two types are different, cast array elements. 349cf053153SJunchao Zhang // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op. 350cf053153SJunchao Zhang static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa) 351cf053153SJunchao Zhang { 352cf053153SJunchao Zhang PetscFunctionBegin; 353cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 354cf053153SJunchao Zhang if (mumps_precision != PETSC_SCALAR_PRECISION) { 355cf053153SJunchao Zhang #if defined(PETSC_USE_COMPLEX) 356cf053153SJunchao Zhang if (mumps_precision == PETSC_PRECISION_SINGLE) { 357cf053153SJunchao Zhang PetscReal *a = (PetscReal *)pa; 358cf053153SJunchao Zhang const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma; 359cf053153SJunchao Zhang for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i]; 360cf053153SJunchao Zhang } else { 361cf053153SJunchao Zhang PetscReal *a = (PetscReal *)pa; 362cf053153SJunchao Zhang const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma; 363cf053153SJunchao Zhang for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i]; 364cf053153SJunchao Zhang } 365cf053153SJunchao Zhang #else 366cf053153SJunchao Zhang if (mumps_precision == PETSC_PRECISION_SINGLE) { 367cf053153SJunchao Zhang const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma; 368cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) pa[i] = b[i]; 369cf053153SJunchao Zhang } else { 370cf053153SJunchao Zhang const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma; 371cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) pa[i] = b[i]; 372cf053153SJunchao Zhang } 373cf053153SJunchao Zhang #endif 374cf053153SJunchao Zhang } else 375cf053153SJunchao Zhang #endif 376cf053153SJunchao Zhang PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n)); 377cf053153SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 378cf053153SJunchao Zhang } 379cf053153SJunchao Zhang 380cf053153SJunchao Zhang // Cast a PetscScalar array <pa[n]> to a MumpsScalar array in the given <mumps_precision> at address <ma>. 381cf053153SJunchao Zhang // 382cf053153SJunchao Zhang // 1) If the two types are different, cast array elements. 383cf053153SJunchao Zhang // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op. 384cf053153SJunchao Zhang static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma) 385cf053153SJunchao Zhang { 386cf053153SJunchao Zhang PetscFunctionBegin; 387cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 388cf053153SJunchao Zhang if (mumps_precision != PETSC_SCALAR_PRECISION) { 389cf053153SJunchao Zhang #if defined(PETSC_USE_COMPLEX) 390cf053153SJunchao Zhang if (mumps_precision == PETSC_PRECISION_SINGLE) { 391cf053153SJunchao Zhang CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma; 392cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) { 393cf053153SJunchao Zhang b[i].r = PetscRealPart(pa[i]); 394cf053153SJunchao Zhang b[i].i = PetscImaginaryPart(pa[i]); 395cf053153SJunchao Zhang } 396cf053153SJunchao Zhang } else { 397cf053153SJunchao Zhang ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma; 398cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) { 399cf053153SJunchao Zhang b[i].r = PetscRealPart(pa[i]); 400cf053153SJunchao Zhang b[i].i = PetscImaginaryPart(pa[i]); 401cf053153SJunchao Zhang } 402cf053153SJunchao Zhang } 403cf053153SJunchao Zhang #else 404cf053153SJunchao Zhang if (mumps_precision == PETSC_PRECISION_SINGLE) { 405cf053153SJunchao Zhang SMUMPS_REAL *b = (SMUMPS_REAL *)ma; 406cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 407cf053153SJunchao Zhang } else { 408cf053153SJunchao Zhang DMUMPS_REAL *b = (DMUMPS_REAL *)ma; 409cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 410cf053153SJunchao Zhang } 411cf053153SJunchao Zhang #endif 412cf053153SJunchao Zhang } else 413cf053153SJunchao Zhang #endif 414cf053153SJunchao Zhang PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n)); 415cf053153SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 416cf053153SJunchao Zhang } 417cf053153SJunchao Zhang 418cf053153SJunchao Zhang static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id) 419cf053153SJunchao Zhang { 420cf053153SJunchao Zhang return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT; 421cf053153SJunchao Zhang } 422cf053153SJunchao Zhang 423cf053153SJunchao Zhang #define PreMumpsCall(inner, outer, mumpsscalar) \ 424cf053153SJunchao Zhang do { \ 425cf053153SJunchao Zhang inner->job = outer->job; \ 426cf053153SJunchao Zhang inner->n = outer->n; \ 427cf053153SJunchao Zhang inner->nblk = outer->nblk; \ 428cf053153SJunchao Zhang inner->nnz = outer->nnz; \ 429cf053153SJunchao Zhang inner->irn = outer->irn; \ 430cf053153SJunchao Zhang inner->jcn = outer->jcn; \ 431cf053153SJunchao Zhang inner->a = (mumpsscalar *)outer->a; \ 432cf053153SJunchao Zhang inner->nnz_loc = outer->nnz_loc; \ 433cf053153SJunchao Zhang inner->irn_loc = outer->irn_loc; \ 434cf053153SJunchao Zhang inner->jcn_loc = outer->jcn_loc; \ 435cf053153SJunchao Zhang inner->a_loc = (mumpsscalar *)outer->a_loc; \ 436cf053153SJunchao Zhang inner->blkptr = outer->blkptr; \ 437cf053153SJunchao Zhang inner->blkvar = outer->blkvar; \ 438cf053153SJunchao Zhang inner->perm_in = outer->perm_in; \ 439cf053153SJunchao Zhang inner->rhs = (mumpsscalar *)outer->rhs; \ 440cf053153SJunchao Zhang inner->redrhs = (mumpsscalar *)outer->redrhs; \ 441cf053153SJunchao Zhang inner->rhs_sparse = (mumpsscalar *)outer->rhs_sparse; \ 442cf053153SJunchao Zhang inner->sol_loc = (mumpsscalar *)outer->sol_loc; \ 443cf053153SJunchao Zhang inner->rhs_loc = (mumpsscalar *)outer->rhs_loc; \ 444cf053153SJunchao Zhang inner->irhs_sparse = outer->irhs_sparse; \ 445cf053153SJunchao Zhang inner->irhs_ptr = outer->irhs_ptr; \ 446cf053153SJunchao Zhang inner->isol_loc = outer->isol_loc; \ 447cf053153SJunchao Zhang inner->irhs_loc = outer->irhs_loc; \ 448cf053153SJunchao Zhang inner->nrhs = outer->nrhs; \ 449cf053153SJunchao Zhang inner->lrhs = outer->lrhs; \ 450cf053153SJunchao Zhang inner->lredrhs = outer->lredrhs; \ 451cf053153SJunchao Zhang inner->nz_rhs = outer->nz_rhs; \ 452cf053153SJunchao Zhang inner->lsol_loc = outer->lsol_loc; \ 453cf053153SJunchao Zhang inner->nloc_rhs = outer->nloc_rhs; \ 454cf053153SJunchao Zhang inner->lrhs_loc = outer->lrhs_loc; \ 455cf053153SJunchao Zhang inner->schur_lld = outer->schur_lld; \ 456cf053153SJunchao Zhang inner->size_schur = outer->size_schur; \ 457cf053153SJunchao Zhang inner->listvar_schur = outer->listvar_schur; \ 458cf053153SJunchao Zhang inner->schur = (mumpsscalar *)outer->schur; \ 459cf053153SJunchao Zhang } while (0) 460cf053153SJunchao Zhang 461cf053153SJunchao Zhang #define PostMumpsCall(inner, outer) \ 462cf053153SJunchao Zhang do { \ 463cf053153SJunchao Zhang outer->pivnul_list = inner->pivnul_list; \ 464cf053153SJunchao Zhang outer->mapping = inner->mapping; \ 465cf053153SJunchao Zhang } while (0) 466cf053153SJunchao Zhang 467cf053153SJunchao Zhang // Entry for PETSc to call mumps 468cf053153SJunchao Zhang static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer) 469cf053153SJunchao Zhang { 470cf053153SJunchao Zhang PetscFunctionBegin; 471cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 472cf053153SJunchao Zhang #if defined(PETSC_USE_COMPLEX) 473cf053153SJunchao Zhang if (outer->precision == PETSC_PRECISION_SINGLE) { 474cf053153SJunchao Zhang CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id; 475cf053153SJunchao Zhang PreMumpsCall(inner, outer, CMUMPS_COMPLEX); 476cf053153SJunchao Zhang PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner)); 477cf053153SJunchao Zhang PostMumpsCall(inner, outer); 478cf053153SJunchao Zhang } else { 479cf053153SJunchao Zhang ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id; 480cf053153SJunchao Zhang PreMumpsCall(inner, outer, ZMUMPS_COMPLEX); 481cf053153SJunchao Zhang PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner)); 482cf053153SJunchao Zhang PostMumpsCall(inner, outer); 483cf053153SJunchao Zhang } 484cf053153SJunchao Zhang #else 485cf053153SJunchao Zhang if (outer->precision == PETSC_PRECISION_SINGLE) { 486cf053153SJunchao Zhang SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id; 487cf053153SJunchao Zhang PreMumpsCall(inner, outer, SMUMPS_REAL); 488cf053153SJunchao Zhang PetscStackCallExternalVoid("smumps_c", smumps_c(inner)); 489cf053153SJunchao Zhang PostMumpsCall(inner, outer); 490cf053153SJunchao Zhang } else { 491cf053153SJunchao Zhang DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id; 492cf053153SJunchao Zhang PreMumpsCall(inner, outer, DMUMPS_REAL); 493cf053153SJunchao Zhang PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner)); 494cf053153SJunchao Zhang PostMumpsCall(inner, outer); 495cf053153SJunchao Zhang } 496cf053153SJunchao Zhang #endif 497cf053153SJunchao Zhang #else 498cf053153SJunchao Zhang MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id; 499cf053153SJunchao Zhang PreMumpsCall(inner, outer, MumpsScalar); 500cf053153SJunchao Zhang PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner)); 501cf053153SJunchao Zhang PostMumpsCall(inner, outer); 502cf053153SJunchao Zhang #endif 503cf053153SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 504cf053153SJunchao Zhang } 505cf053153SJunchao Zhang 506cf053153SJunchao Zhang /* macros s.t. indices match MUMPS documentation */ 507cf053153SJunchao Zhang #define ICNTL(I) icntl[(I) - 1] 508cf053153SJunchao Zhang #define INFOG(I) infog[(I) - 1] 509cf053153SJunchao Zhang #define INFO(I) info[(I) - 1] 510cf053153SJunchao Zhang 511cf053153SJunchao Zhang // Get a value from a MumpsScalar array, which is the <F> field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I! 512cf053153SJunchao Zhang #if defined(PETSC_USE_COMPLEX) 513cf053153SJunchao Zhang #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i) 514cf053153SJunchao Zhang #else 515cf053153SJunchao Zhang #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I]) 516cf053153SJunchao Zhang #endif 517cf053153SJunchao Zhang 518cf053153SJunchao Zhang // Get a value from MumpsReal arrays. The value is convertible to PetscReal. 519cf053153SJunchao Zhang #define ID_CNTL_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1]) 520cf053153SJunchao Zhang #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1]) 521cf053153SJunchao Zhang #define ID_RINFO_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1]) 522cf053153SJunchao Zhang 523cf053153SJunchao Zhang // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal <VAL> 524cf053153SJunchao Zhang #define ID_CNTL_SET(ID, I, VAL) \ 525cf053153SJunchao Zhang do { \ 526cf053153SJunchao Zhang if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \ 527cf053153SJunchao Zhang else ((double *)(ID).cntl)[(I) - 1] = (VAL); \ 528cf053153SJunchao Zhang } while (0) 529cf053153SJunchao Zhang 530217d3b1eSJunchao Zhang /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 5313ab56b82SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 5323ab56b82SJunchao Zhang #define PetscMUMPS_c(mumps) \ 5333ab56b82SJunchao Zhang do { \ 5343ab56b82SJunchao Zhang if (mumps->use_petsc_omp_support) { \ 5353ab56b82SJunchao Zhang if (mumps->is_omp_master) { \ 5369566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 53714ffdc6fSStefano Zampini PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 538cf053153SJunchao Zhang PetscCall(PetscCallMumps_Private(&mumps->id)); \ 53914ffdc6fSStefano Zampini PetscCall(PetscFPTrapPop()); \ 5409566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 5413ab56b82SJunchao Zhang } \ 5429566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 543c3714a1dSJunchao Zhang /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 544c3714a1dSJunchao Zhang to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 545c3714a1dSJunchao Zhang an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 546c3714a1dSJunchao Zhang omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 547c3714a1dSJunchao Zhang */ \ 548*d77aa1b8SPierre Jolivet MUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \ 549cf053153SJunchao Zhang PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 550cf053153SJunchao Zhang PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 551cf053153SJunchao Zhang PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \ 552cf053153SJunchao Zhang PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \ 5533ab56b82SJunchao Zhang } else { \ 55414ffdc6fSStefano Zampini PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 555cf053153SJunchao Zhang PetscCall(PetscCallMumps_Private(&mumps->id)); \ 55614ffdc6fSStefano Zampini PetscCall(PetscFPTrapPop()); \ 5573ab56b82SJunchao Zhang } \ 5583ab56b82SJunchao Zhang } while (0) 5593ab56b82SJunchao Zhang #else 5603ab56b82SJunchao Zhang #define PetscMUMPS_c(mumps) \ 561d71ae5a4SJacob Faibussowitsch do { \ 56214ffdc6fSStefano Zampini PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 563cf053153SJunchao Zhang PetscCall(PetscCallMumps_Private(&mumps->id)); \ 56414ffdc6fSStefano Zampini PetscCall(PetscFPTrapPop()); \ 565d71ae5a4SJacob Faibussowitsch } while (0) 5663ab56b82SJunchao Zhang #endif 5673ab56b82SJunchao Zhang 568a6053eceSJunchao Zhang typedef struct Mat_MUMPS Mat_MUMPS; 569a6053eceSJunchao Zhang struct Mat_MUMPS { 570cf053153SJunchao Zhang XMUMPS_STRUC_C id; 5712907cef9SHong Zhang 572397b6df1SKris Buschelman MatStructure matstruc; 5732d4298aeSJunchao Zhang PetscMPIInt myid, petsc_size; 574a6053eceSJunchao Zhang PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 575a6053eceSJunchao Zhang PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 5766497c311SBarry Smith PetscCount nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 577a6053eceSJunchao Zhang PetscMUMPSInt sym; 5782d4298aeSJunchao Zhang MPI_Comm mumps_comm; 579413bcc21SPierre Jolivet PetscMUMPSInt *ICNTL_pre; 580413bcc21SPierre Jolivet PetscReal *CNTL_pre; 581a6053eceSJunchao Zhang PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 582801fbe65SHong Zhang VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 58325aac85cSJunchao Zhang PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 584cf053153SJunchao Zhang PetscMUMPSInt ICNTL26; 58567602552SJunchao Zhang PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 58667602552SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 58767602552SJunchao Zhang PetscInt *rhs_nrow, max_nrhs; 58867602552SJunchao Zhang PetscMPIInt *rhs_recvcounts, *rhs_disps; 58967602552SJunchao Zhang PetscScalar *rhs_loc, *rhs_recvbuf; 59067602552SJunchao Zhang #endif 591801fbe65SHong Zhang Vec b_seq, x_seq; 592a6053eceSJunchao Zhang PetscInt ninfo, *info; /* which INFO to display */ 593b5fa320bSStefano Zampini PetscInt sizeredrhs; 59459ac8732SStefano Zampini PetscScalar *schur_sol; 59559ac8732SStefano Zampini PetscInt schur_sizesol; 596cf053153SJunchao Zhang PetscScalar *redrhs; // buffer in PetscScalar in case MumpsScalar is in a different precision 597a6053eceSJunchao Zhang PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 5986497c311SBarry Smith PetscCount cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 599a6053eceSJunchao Zhang PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 6002205254eSKarl Rupp 6019d0448ceSStefano Zampini /* Support for MATNEST */ 6029d0448ceSStefano Zampini PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 6036497c311SBarry Smith PetscCount *nest_vals_start; 6049d0448ceSStefano Zampini PetscScalar *nest_vals; 6059d0448ceSStefano Zampini 606a6053eceSJunchao Zhang /* stuff used by petsc/mumps OpenMP support*/ 6073ab56b82SJunchao Zhang PetscBool use_petsc_omp_support; 608da81f932SPierre Jolivet PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 609f0b74427SPierre Jolivet MPI_Comm petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */ 6106497c311SBarry Smith PetscCount *recvcount; /* a collection of nnz on omp_master */ 611a6053eceSJunchao Zhang PetscMPIInt tag, omp_comm_size; 6123ab56b82SJunchao Zhang PetscBool is_omp_master; /* is this rank the master of omp_comm */ 613a6053eceSJunchao Zhang MPI_Request *reqs; 614a6053eceSJunchao Zhang }; 6153ab56b82SJunchao Zhang 616a6053eceSJunchao Zhang /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 617a6053eceSJunchao Zhang Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 618a6053eceSJunchao Zhang */ 619d2a308c1SPierre Jolivet static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 620d71ae5a4SJacob Faibussowitsch { 6216497c311SBarry Smith PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */ 622f0c56d0fSKris Buschelman 623a6053eceSJunchao Zhang PetscFunctionBegin; 624a6053eceSJunchao Zhang #if defined(PETSC_USE_64BIT_INDICES) 625a6053eceSJunchao Zhang { 626a6053eceSJunchao Zhang PetscInt i; 627a6053eceSJunchao Zhang if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 6289566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ia_alloc)); 6299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 630a6053eceSJunchao Zhang mumps->cur_ilen = nrow + 1; 631a6053eceSJunchao Zhang } 632a6053eceSJunchao Zhang if (nnz > mumps->cur_jlen) { 6339566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ja_alloc)); 6349566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 635a6053eceSJunchao Zhang mumps->cur_jlen = nnz; 636a6053eceSJunchao Zhang } 637f4f49eeaSPierre Jolivet for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i])); 638f4f49eeaSPierre Jolivet for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i])); 639a6053eceSJunchao Zhang *ia_mumps = mumps->ia_alloc; 640a6053eceSJunchao Zhang *ja_mumps = mumps->ja_alloc; 641a6053eceSJunchao Zhang } 642a6053eceSJunchao Zhang #else 643a6053eceSJunchao Zhang *ia_mumps = ia; 644a6053eceSJunchao Zhang *ja_mumps = ja; 645a6053eceSJunchao Zhang #endif 6469566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 6473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 648a6053eceSJunchao Zhang } 649b24902e0SBarry Smith 650d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 651d71ae5a4SJacob Faibussowitsch { 652b5fa320bSStefano Zampini PetscFunctionBegin; 6539566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.listvar_schur)); 654cf053153SJunchao Zhang PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID() 6559566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->schur_sol)); 65659ac8732SStefano Zampini mumps->id.size_schur = 0; 657b3cb21ddSStefano Zampini mumps->id.schur_lld = 0; 658cf053153SJunchao Zhang if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built 6593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 66059ac8732SStefano Zampini } 66159ac8732SStefano Zampini 662b3cb21ddSStefano Zampini /* solve with rhs in mumps->id.redrhs and return in the same location */ 663d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 664d71ae5a4SJacob Faibussowitsch { 665b3cb21ddSStefano Zampini Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 666cf053153SJunchao Zhang Mat S, B, X; // solve S*X = B; all three matrices are dense 667b3cb21ddSStefano Zampini MatFactorSchurStatus schurstatus; 668b3cb21ddSStefano Zampini PetscInt sizesol; 669cf053153SJunchao Zhang const PetscScalar *xarray; 67059ac8732SStefano Zampini 67159ac8732SStefano Zampini PetscFunctionBegin; 6729566063dSJacob Faibussowitsch PetscCall(MatFactorFactorizeSchurComplement(F)); 6739566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 674cf053153SJunchao Zhang PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs)); 675cf053153SJunchao Zhang 676cf053153SJunchao Zhang PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B)); 6779566063dSJacob Faibussowitsch PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 678a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6799566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(B, S->boundtocpu)); 680a3d589ffSStefano Zampini #endif 681b3cb21ddSStefano Zampini switch (schurstatus) { 682d71ae5a4SJacob Faibussowitsch case MAT_FACTOR_SCHUR_FACTORED: 683cf053153SJunchao Zhang PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X)); 684d71ae5a4SJacob Faibussowitsch PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 685a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6869566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(X, S->boundtocpu)); 687a3d589ffSStefano Zampini #endif 688b3cb21ddSStefano Zampini if (!mumps->id.ICNTL(9)) { /* transpose solve */ 6899566063dSJacob Faibussowitsch PetscCall(MatMatSolveTranspose(S, B, X)); 69059ac8732SStefano Zampini } else { 6919566063dSJacob Faibussowitsch PetscCall(MatMatSolve(S, B, X)); 69259ac8732SStefano Zampini } 693b3cb21ddSStefano Zampini break; 694b3cb21ddSStefano Zampini case MAT_FACTOR_SCHUR_INVERTED: 695b3cb21ddSStefano Zampini sizesol = mumps->id.nrhs * mumps->id.size_schur; 69659ac8732SStefano Zampini if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 6979566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->schur_sol)); 6989566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 69959ac8732SStefano Zampini mumps->schur_sizesol = sizesol; 700b5fa320bSStefano Zampini } 7019566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 7029566063dSJacob Faibussowitsch PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 703a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 7049566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(X, S->boundtocpu)); 705a3d589ffSStefano Zampini #endif 7069566063dSJacob Faibussowitsch PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 70759ac8732SStefano Zampini if (!mumps->id.ICNTL(9)) { /* transpose solve */ 7089566063dSJacob Faibussowitsch PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 709b5fa320bSStefano Zampini } else { 7109566063dSJacob Faibussowitsch PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 711b5fa320bSStefano Zampini } 7129566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(X)); 7139566063dSJacob Faibussowitsch PetscCall(MatProductSymbolic(X)); 7149566063dSJacob Faibussowitsch PetscCall(MatProductNumeric(X)); 7154417c5e8SHong Zhang 7169566063dSJacob Faibussowitsch PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 717b3cb21ddSStefano Zampini break; 718d71ae5a4SJacob Faibussowitsch default: 719d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 72059ac8732SStefano Zampini } 721cf053153SJunchao Zhang // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device. 722cf053153SJunchao Zhang PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs. 723cf053153SJunchao Zhang PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs)); 724cf053153SJunchao Zhang PetscCall(MatDenseRestoreArrayRead(X, &xarray)); 7259566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 7269566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 7279566063dSJacob Faibussowitsch PetscCall(MatDestroy(&X)); 7283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 729b5fa320bSStefano Zampini } 730b5fa320bSStefano Zampini 731d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 732d71ae5a4SJacob Faibussowitsch { 733b3cb21ddSStefano Zampini Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 734b5fa320bSStefano Zampini 735b5fa320bSStefano Zampini PetscFunctionBegin; 736b5fa320bSStefano Zampini if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 7373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 738b5fa320bSStefano Zampini } 739b8f61ee1SStefano Zampini if (!expansion) { /* prepare for the condensation step */ 740b5fa320bSStefano Zampini PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 741b5fa320bSStefano Zampini /* allocate MUMPS internal array to store reduced right-hand sides */ 742b5fa320bSStefano Zampini if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 743b5fa320bSStefano Zampini mumps->id.lredrhs = mumps->id.size_schur; 744b5fa320bSStefano Zampini mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 745cf053153SJunchao Zhang if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs)); 746cf053153SJunchao Zhang PetscCall(PetscFree(mumps->redrhs)); 747cf053153SJunchao Zhang PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs)); 748cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs)); 749b5fa320bSStefano Zampini } 750b5fa320bSStefano Zampini } else { /* prepare for the expansion step */ 751cf053153SJunchao Zhang PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */ 752b5fa320bSStefano Zampini mumps->id.ICNTL(26) = 2; /* expansion phase */ 7533ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 754cf053153SJunchao Zhang PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 755b5fa320bSStefano Zampini /* restore defaults */ 756b5fa320bSStefano Zampini mumps->id.ICNTL(26) = -1; 757d3d598ffSStefano Zampini /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 758d3d598ffSStefano Zampini if (mumps->id.nrhs > 1) { 759cf053153SJunchao Zhang if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs)); 760cf053153SJunchao Zhang PetscCall(PetscFree(mumps->redrhs)); 761cf053153SJunchao Zhang mumps->id.redrhs_len = 0; 762d3d598ffSStefano Zampini mumps->id.lredrhs = 0; 763d3d598ffSStefano Zampini mumps->sizeredrhs = 0; 764d3d598ffSStefano Zampini } 765b5fa320bSStefano Zampini } 7663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 767b5fa320bSStefano Zampini } 768b5fa320bSStefano Zampini 769397b6df1SKris Buschelman /* 770f0b74427SPierre Jolivet MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz] 771d341cd04SHong Zhang 772397b6df1SKris Buschelman input: 77375480915SPierre Jolivet A - matrix in aij,baij or sbaij format 774397b6df1SKris Buschelman shift - 0: C style output triple; 1: Fortran style output triple. 775bccb9932SShri Abhyankar reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 776bccb9932SShri Abhyankar MAT_REUSE_MATRIX: only the values in v array are updated 777397b6df1SKris Buschelman output: 778397b6df1SKris Buschelman nnz - dim of r, c, and v (number of local nonzero entries of A) 779397b6df1SKris Buschelman r, c, v - row and col index, matrix values (matrix triples) 780eb9baa12SBarry Smith 781eb9baa12SBarry Smith The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 7827ee00b23SStefano Zampini freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 783eb9baa12SBarry Smith that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 784eb9baa12SBarry Smith 785397b6df1SKris Buschelman */ 78616ebf90aSShri Abhyankar 78766976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 788d71ae5a4SJacob Faibussowitsch { 789a3d589ffSStefano Zampini const PetscScalar *av; 790185f6596SHong Zhang const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 7916497c311SBarry Smith PetscCount nz, rnz, k; 792a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 79316ebf90aSShri Abhyankar Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 794397b6df1SKris Buschelman 795397b6df1SKris Buschelman PetscFunctionBegin; 7969566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A, &av)); 797bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 7982205254eSKarl Rupp nz = aa->nz; 7992205254eSKarl Rupp ai = aa->i; 8002205254eSKarl Rupp aj = aa->j; 8019566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 8026497c311SBarry Smith for (PetscCount i = k = 0; i < M; i++) { 80316ebf90aSShri Abhyankar rnz = ai[i + 1] - ai[i]; 80467877ebaSShri Abhyankar ajj = aj + ai[i]; 8056497c311SBarry Smith for (PetscCount j = 0; j < rnz; j++) { 8069566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 8079566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 808a6053eceSJunchao Zhang k++; 80916ebf90aSShri Abhyankar } 81016ebf90aSShri Abhyankar } 81150c845baSStefano Zampini mumps->val = (PetscScalar *)av; 812a6053eceSJunchao Zhang mumps->irn = row; 813a6053eceSJunchao Zhang mumps->jcn = col; 814a6053eceSJunchao Zhang mumps->nnz = nz; 815127cd276SPierre Jolivet } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 816127cd276SPierre Jolivet else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 8179566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 8183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 81916ebf90aSShri Abhyankar } 820397b6df1SKris Buschelman 82166976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 822d71ae5a4SJacob Faibussowitsch { 8236497c311SBarry Smith PetscCount nz, i, j, k, r; 8247ee00b23SStefano Zampini Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 825a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 8267ee00b23SStefano Zampini 8277ee00b23SStefano Zampini PetscFunctionBegin; 8287ee00b23SStefano Zampini nz = a->sliidx[a->totalslices]; 82950c845baSStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 8309566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 831a6053eceSJunchao Zhang for (i = k = 0; i < a->totalslices; i++) { 83248a46eb9SPierre Jolivet for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 8337ee00b23SStefano Zampini } 8349566063dSJacob Faibussowitsch for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 835a6053eceSJunchao Zhang mumps->irn = row; 836a6053eceSJunchao Zhang mumps->jcn = col; 837a6053eceSJunchao Zhang mumps->nnz = nz; 83850c845baSStefano Zampini mumps->val = a->val; 839127cd276SPierre Jolivet } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 840127cd276SPierre Jolivet else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 8413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8427ee00b23SStefano Zampini } 8437ee00b23SStefano Zampini 84466976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 845d71ae5a4SJacob Faibussowitsch { 84667877ebaSShri Abhyankar Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 84733d57670SJed Brown const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 8486497c311SBarry Smith PetscCount M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 849a6053eceSJunchao Zhang PetscInt bs; 850a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 85167877ebaSShri Abhyankar 85267877ebaSShri Abhyankar PetscFunctionBegin; 85350c845baSStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 8549566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 85533d57670SJed Brown M = A->rmap->N / bs; 8569371c9d4SSatish Balay ai = aa->i; 8579371c9d4SSatish Balay aj = aa->j; 8589566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 85967877ebaSShri Abhyankar for (i = 0; i < M; i++) { 86067877ebaSShri Abhyankar ajj = aj + ai[i]; 86167877ebaSShri Abhyankar rnz = ai[i + 1] - ai[i]; 86267877ebaSShri Abhyankar for (k = 0; k < rnz; k++) { 86367877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 86467877ebaSShri Abhyankar for (m = 0; m < bs; m++) { 8659566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 8669566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 867a6053eceSJunchao Zhang idx++; 86867877ebaSShri Abhyankar } 86967877ebaSShri Abhyankar } 87067877ebaSShri Abhyankar } 87167877ebaSShri Abhyankar } 872a6053eceSJunchao Zhang mumps->irn = row; 873a6053eceSJunchao Zhang mumps->jcn = col; 874a6053eceSJunchao Zhang mumps->nnz = nz; 87550c845baSStefano Zampini mumps->val = aa->a; 876127cd276SPierre Jolivet } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 877127cd276SPierre Jolivet else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 8783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 87967877ebaSShri Abhyankar } 88067877ebaSShri Abhyankar 88166976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 882d71ae5a4SJacob Faibussowitsch { 88375480915SPierre Jolivet const PetscInt *ai, *aj, *ajj; 884a6053eceSJunchao Zhang PetscInt bs; 8856497c311SBarry Smith PetscCount nz, rnz, i, j, k, m; 886a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 88775480915SPierre Jolivet PetscScalar *val; 88816ebf90aSShri Abhyankar Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 88975480915SPierre Jolivet const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 89038548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 891b94d7dedSBarry Smith PetscBool isset, hermitian; 89238548759SBarry Smith #endif 89316ebf90aSShri Abhyankar 89416ebf90aSShri Abhyankar PetscFunctionBegin; 89538548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 896b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 897b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 89838548759SBarry Smith #endif 8992205254eSKarl Rupp ai = aa->i; 9002205254eSKarl Rupp aj = aa->j; 9019566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 90275480915SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 9036497c311SBarry Smith const PetscCount alloc_size = aa->nz * bs2; 904f3fa974cSJacob Faibussowitsch 905f3fa974cSJacob Faibussowitsch PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 906a6053eceSJunchao Zhang if (bs > 1) { 907f3fa974cSJacob Faibussowitsch PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 908a6053eceSJunchao Zhang mumps->val = mumps->val_alloc; 90975480915SPierre Jolivet } else { 910a6053eceSJunchao Zhang mumps->val = aa->a; 91175480915SPierre Jolivet } 912a6053eceSJunchao Zhang mumps->irn = row; 913a6053eceSJunchao Zhang mumps->jcn = col; 914a6053eceSJunchao Zhang } else { 915a6053eceSJunchao Zhang row = mumps->irn; 916a6053eceSJunchao Zhang col = mumps->jcn; 917a6053eceSJunchao Zhang } 918a6053eceSJunchao Zhang val = mumps->val; 919185f6596SHong Zhang 92016ebf90aSShri Abhyankar nz = 0; 921a81fe166SPierre Jolivet if (bs > 1) { 92275480915SPierre Jolivet for (i = 0; i < mbs; i++) { 92316ebf90aSShri Abhyankar rnz = ai[i + 1] - ai[i]; 92467877ebaSShri Abhyankar ajj = aj + ai[i]; 92575480915SPierre Jolivet for (j = 0; j < rnz; j++) { 92675480915SPierre Jolivet for (k = 0; k < bs; k++) { 92775480915SPierre Jolivet for (m = 0; m < bs; m++) { 928ec4f40fdSPierre Jolivet if (ajj[j] > i || k >= m) { 92975480915SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 9309566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 9319566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 93275480915SPierre Jolivet } 93375480915SPierre Jolivet val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 93475480915SPierre Jolivet } 93575480915SPierre Jolivet } 93675480915SPierre Jolivet } 93775480915SPierre Jolivet } 93875480915SPierre Jolivet } 939a81fe166SPierre Jolivet } else if (reuse == MAT_INITIAL_MATRIX) { 940a81fe166SPierre Jolivet for (i = 0; i < mbs; i++) { 941a81fe166SPierre Jolivet rnz = ai[i + 1] - ai[i]; 942a81fe166SPierre Jolivet ajj = aj + ai[i]; 943a81fe166SPierre Jolivet for (j = 0; j < rnz; j++) { 9449566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 9459566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 946a6053eceSJunchao Zhang nz++; 947a81fe166SPierre Jolivet } 948a81fe166SPierre Jolivet } 9496497c311SBarry Smith PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz); 950127cd276SPierre Jolivet } else if (mumps->nest_vals) 951127cd276SPierre Jolivet PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 952127cd276SPierre Jolivet else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 953a6053eceSJunchao Zhang if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 9543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 95516ebf90aSShri Abhyankar } 95616ebf90aSShri Abhyankar 95766976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 958d71ae5a4SJacob Faibussowitsch { 95967877ebaSShri Abhyankar const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 9606497c311SBarry Smith PetscCount nz, rnz, i, j; 96167877ebaSShri Abhyankar const PetscScalar *av, *v1; 96216ebf90aSShri Abhyankar PetscScalar *val; 963a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 964829b1710SHong Zhang Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 965421480d9SBarry Smith PetscBool diagDense; 96638548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 967b94d7dedSBarry Smith PetscBool hermitian, isset; 96838548759SBarry Smith #endif 96916ebf90aSShri Abhyankar 97016ebf90aSShri Abhyankar PetscFunctionBegin; 97138548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 972b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 973b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 97438548759SBarry Smith #endif 9759566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A, &av)); 9769371c9d4SSatish Balay ai = aa->i; 9779371c9d4SSatish Balay aj = aa->j; 978421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense)); 979bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 9807ee00b23SStefano Zampini /* count nz in the upper triangular part of A */ 981829b1710SHong Zhang nz = 0; 982421480d9SBarry Smith if (!diagDense) { 98329b521d4Sstefano_zampini for (i = 0; i < M; i++) { 98429b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 98529b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 98629b521d4Sstefano_zampini if (aj[j] < i) continue; 98729b521d4Sstefano_zampini nz++; 98829b521d4Sstefano_zampini } 98929b521d4Sstefano_zampini } else { 99029b521d4Sstefano_zampini nz += ai[i + 1] - adiag[i]; 99129b521d4Sstefano_zampini } 99229b521d4Sstefano_zampini } 99329b521d4Sstefano_zampini } else { 994829b1710SHong Zhang for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 99529b521d4Sstefano_zampini } 9969566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 9979566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 998a6053eceSJunchao Zhang mumps->nnz = nz; 999a6053eceSJunchao Zhang mumps->irn = row; 1000a6053eceSJunchao Zhang mumps->jcn = col; 1001a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 1002185f6596SHong Zhang 100316ebf90aSShri Abhyankar nz = 0; 1004421480d9SBarry Smith if (!diagDense) { 100529b521d4Sstefano_zampini for (i = 0; i < M; i++) { 100629b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 100729b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 100829b521d4Sstefano_zampini if (aj[j] < i) continue; 10099566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 10109566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 101129b521d4Sstefano_zampini val[nz] = av[j]; 101229b521d4Sstefano_zampini nz++; 101329b521d4Sstefano_zampini } 101429b521d4Sstefano_zampini } else { 101529b521d4Sstefano_zampini rnz = ai[i + 1] - adiag[i]; 101629b521d4Sstefano_zampini ajj = aj + adiag[i]; 101729b521d4Sstefano_zampini v1 = av + adiag[i]; 101829b521d4Sstefano_zampini for (j = 0; j < rnz; j++) { 10199566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 10209566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 1021a6053eceSJunchao Zhang val[nz++] = v1[j]; 102229b521d4Sstefano_zampini } 102329b521d4Sstefano_zampini } 102429b521d4Sstefano_zampini } 102529b521d4Sstefano_zampini } else { 102616ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 102716ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 102867877ebaSShri Abhyankar ajj = aj + adiag[i]; 1029cf3759fdSShri Abhyankar v1 = av + adiag[i]; 103067877ebaSShri Abhyankar for (j = 0; j < rnz; j++) { 10319566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 10329566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 1033a6053eceSJunchao Zhang val[nz++] = v1[j]; 103416ebf90aSShri Abhyankar } 103516ebf90aSShri Abhyankar } 103629b521d4Sstefano_zampini } 1037397b6df1SKris Buschelman } else { 1038a6053eceSJunchao Zhang nz = 0; 1039a6053eceSJunchao Zhang val = mumps->val; 1040421480d9SBarry Smith if (!diagDense) { 104116ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 104229b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 104329b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 104429b521d4Sstefano_zampini if (aj[j] < i) continue; 104529b521d4Sstefano_zampini val[nz++] = av[j]; 104629b521d4Sstefano_zampini } 104729b521d4Sstefano_zampini } else { 104816ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 104967877ebaSShri Abhyankar v1 = av + adiag[i]; 1050ad540459SPierre Jolivet for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 105116ebf90aSShri Abhyankar } 105216ebf90aSShri Abhyankar } 105329b521d4Sstefano_zampini } else { 105416ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 105516ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 105616ebf90aSShri Abhyankar v1 = av + adiag[i]; 1057ad540459SPierre Jolivet for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 105816ebf90aSShri Abhyankar } 105916ebf90aSShri Abhyankar } 106029b521d4Sstefano_zampini } 10619566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 10623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 106316ebf90aSShri Abhyankar } 106416ebf90aSShri Abhyankar 106566976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1066d71ae5a4SJacob Faibussowitsch { 1067a6053eceSJunchao Zhang const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 1068a6053eceSJunchao Zhang PetscInt bs; 10696497c311SBarry Smith PetscCount rstart, nz, i, j, k, m, jj, irow, countA, countB; 1070a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 107116ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 107216ebf90aSShri Abhyankar PetscScalar *val; 1073397b6df1SKris Buschelman Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 1074f4f49eeaSPierre Jolivet Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)mat->A->data; 1075f4f49eeaSPierre Jolivet Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 1076ec4f40fdSPierre Jolivet const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 107738548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 1078b94d7dedSBarry Smith PetscBool hermitian, isset; 107938548759SBarry Smith #endif 108016ebf90aSShri Abhyankar 108116ebf90aSShri Abhyankar PetscFunctionBegin; 108238548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 1083b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 1084b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 108538548759SBarry Smith #endif 10869566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 108738548759SBarry Smith rstart = A->rmap->rstart; 108838548759SBarry Smith ai = aa->i; 108938548759SBarry Smith aj = aa->j; 109038548759SBarry Smith bi = bb->i; 109138548759SBarry Smith bj = bb->j; 109238548759SBarry Smith av = aa->a; 109338548759SBarry Smith bv = bb->a; 1094397b6df1SKris Buschelman 10952205254eSKarl Rupp garray = mat->garray; 10962205254eSKarl Rupp 1097bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 1098a6053eceSJunchao Zhang nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 10999566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 11009566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 1101a6053eceSJunchao Zhang /* can not decide the exact mumps->nnz now because of the SBAIJ */ 1102a6053eceSJunchao Zhang mumps->irn = row; 1103a6053eceSJunchao Zhang mumps->jcn = col; 1104a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 1105397b6df1SKris Buschelman } else { 1106a6053eceSJunchao Zhang val = mumps->val; 1107397b6df1SKris Buschelman } 1108397b6df1SKris Buschelman 11099371c9d4SSatish Balay jj = 0; 11109371c9d4SSatish Balay irow = rstart; 1111ec4f40fdSPierre Jolivet for (i = 0; i < mbs; i++) { 1112397b6df1SKris Buschelman ajj = aj + ai[i]; /* ptr to the beginning of this row */ 1113397b6df1SKris Buschelman countA = ai[i + 1] - ai[i]; 1114397b6df1SKris Buschelman countB = bi[i + 1] - bi[i]; 1115397b6df1SKris Buschelman bjj = bj + bi[i]; 1116ec4f40fdSPierre Jolivet v1 = av + ai[i] * bs2; 1117ec4f40fdSPierre Jolivet v2 = bv + bi[i] * bs2; 1118397b6df1SKris Buschelman 1119ec4f40fdSPierre Jolivet if (bs > 1) { 1120ec4f40fdSPierre Jolivet /* A-part */ 1121ec4f40fdSPierre Jolivet for (j = 0; j < countA; j++) { 1122ec4f40fdSPierre Jolivet for (k = 0; k < bs; k++) { 1123ec4f40fdSPierre Jolivet for (m = 0; m < bs; m++) { 1124ec4f40fdSPierre Jolivet if (rstart + ajj[j] * bs > irow || k >= m) { 1125ec4f40fdSPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 11269566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 11279566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 1128ec4f40fdSPierre Jolivet } 1129ec4f40fdSPierre Jolivet val[jj++] = v1[j * bs2 + m + k * bs]; 1130ec4f40fdSPierre Jolivet } 1131ec4f40fdSPierre Jolivet } 1132ec4f40fdSPierre Jolivet } 1133ec4f40fdSPierre Jolivet } 1134ec4f40fdSPierre Jolivet 1135ec4f40fdSPierre Jolivet /* B-part */ 1136ec4f40fdSPierre Jolivet for (j = 0; j < countB; j++) { 1137ec4f40fdSPierre Jolivet for (k = 0; k < bs; k++) { 1138ec4f40fdSPierre Jolivet for (m = 0; m < bs; m++) { 1139ec4f40fdSPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 11409566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 11419566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 1142ec4f40fdSPierre Jolivet } 1143ec4f40fdSPierre Jolivet val[jj++] = v2[j * bs2 + m + k * bs]; 1144ec4f40fdSPierre Jolivet } 1145ec4f40fdSPierre Jolivet } 1146ec4f40fdSPierre Jolivet } 1147ec4f40fdSPierre Jolivet } else { 1148397b6df1SKris Buschelman /* A-part */ 1149397b6df1SKris Buschelman for (j = 0; j < countA; j++) { 1150bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 11519566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 11529566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 1153397b6df1SKris Buschelman } 115416ebf90aSShri Abhyankar val[jj++] = v1[j]; 1155397b6df1SKris Buschelman } 115616ebf90aSShri Abhyankar 115716ebf90aSShri Abhyankar /* B-part */ 115816ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 1159bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 11609566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 11619566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1162397b6df1SKris Buschelman } 116316ebf90aSShri Abhyankar val[jj++] = v2[j]; 116416ebf90aSShri Abhyankar } 116516ebf90aSShri Abhyankar } 1166ec4f40fdSPierre Jolivet irow += bs; 1167ec4f40fdSPierre Jolivet } 11685d955bbbSStefano Zampini if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 11693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 117016ebf90aSShri Abhyankar } 117116ebf90aSShri Abhyankar 117266976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1173d71ae5a4SJacob Faibussowitsch { 117416ebf90aSShri Abhyankar const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 11756497c311SBarry Smith PetscCount rstart, cstart, nz, i, j, jj, irow, countA, countB; 1176a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 117716ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 117816ebf90aSShri Abhyankar PetscScalar *val; 1179a3d589ffSStefano Zampini Mat Ad, Ao; 1180a3d589ffSStefano Zampini Mat_SeqAIJ *aa; 1181a3d589ffSStefano Zampini Mat_SeqAIJ *bb; 118216ebf90aSShri Abhyankar 118316ebf90aSShri Abhyankar PetscFunctionBegin; 11849566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 11859566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 11869566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 1187a3d589ffSStefano Zampini 118857508eceSPierre Jolivet aa = (Mat_SeqAIJ *)Ad->data; 118957508eceSPierre Jolivet bb = (Mat_SeqAIJ *)Ao->data; 119038548759SBarry Smith ai = aa->i; 119138548759SBarry Smith aj = aa->j; 119238548759SBarry Smith bi = bb->i; 119338548759SBarry Smith bj = bb->j; 119416ebf90aSShri Abhyankar 1195a3d589ffSStefano Zampini rstart = A->rmap->rstart; 11965d955bbbSStefano Zampini cstart = A->cmap->rstart; 11972205254eSKarl Rupp 1198bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 11996497c311SBarry Smith nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 12009566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 12019566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 1202a6053eceSJunchao Zhang mumps->nnz = nz; 1203a6053eceSJunchao Zhang mumps->irn = row; 1204a6053eceSJunchao Zhang mumps->jcn = col; 1205a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 120616ebf90aSShri Abhyankar } else { 1207a6053eceSJunchao Zhang val = mumps->val; 120816ebf90aSShri Abhyankar } 120916ebf90aSShri Abhyankar 12109371c9d4SSatish Balay jj = 0; 12119371c9d4SSatish Balay irow = rstart; 121216ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 121316ebf90aSShri Abhyankar ajj = aj + ai[i]; /* ptr to the beginning of this row */ 121416ebf90aSShri Abhyankar countA = ai[i + 1] - ai[i]; 121516ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 121616ebf90aSShri Abhyankar bjj = bj + bi[i]; 121716ebf90aSShri Abhyankar v1 = av + ai[i]; 121816ebf90aSShri Abhyankar v2 = bv + bi[i]; 121916ebf90aSShri Abhyankar 122016ebf90aSShri Abhyankar /* A-part */ 122116ebf90aSShri Abhyankar for (j = 0; j < countA; j++) { 1222bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 12239566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 12245d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 122516ebf90aSShri Abhyankar } 122616ebf90aSShri Abhyankar val[jj++] = v1[j]; 122716ebf90aSShri Abhyankar } 122816ebf90aSShri Abhyankar 122916ebf90aSShri Abhyankar /* B-part */ 123016ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 1231bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 12329566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 12339566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 123416ebf90aSShri Abhyankar } 123516ebf90aSShri Abhyankar val[jj++] = v2[j]; 123616ebf90aSShri Abhyankar } 123716ebf90aSShri Abhyankar irow++; 123816ebf90aSShri Abhyankar } 12399566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 12409566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 12413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 124216ebf90aSShri Abhyankar } 124316ebf90aSShri Abhyankar 124466976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1245d71ae5a4SJacob Faibussowitsch { 124667877ebaSShri Abhyankar Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 1247f4f49eeaSPierre Jolivet Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)mat->A->data; 1248f4f49eeaSPierre Jolivet Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 124967877ebaSShri Abhyankar const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 12505d955bbbSStefano Zampini const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 125133d57670SJed Brown const PetscInt bs2 = mat->bs2; 1252a6053eceSJunchao Zhang PetscInt bs; 12536497c311SBarry Smith PetscCount nz, i, j, k, n, jj, irow, countA, countB, idx; 1254a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 125567877ebaSShri Abhyankar const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 125667877ebaSShri Abhyankar PetscScalar *val; 125767877ebaSShri Abhyankar 125867877ebaSShri Abhyankar PetscFunctionBegin; 12599566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 1260bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 126167877ebaSShri Abhyankar nz = bs2 * (aa->nz + bb->nz); 12629566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 12639566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 1264a6053eceSJunchao Zhang mumps->nnz = nz; 1265a6053eceSJunchao Zhang mumps->irn = row; 1266a6053eceSJunchao Zhang mumps->jcn = col; 1267a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 126867877ebaSShri Abhyankar } else { 1269a6053eceSJunchao Zhang val = mumps->val; 127067877ebaSShri Abhyankar } 127167877ebaSShri Abhyankar 12729371c9d4SSatish Balay jj = 0; 12739371c9d4SSatish Balay irow = rstart; 127467877ebaSShri Abhyankar for (i = 0; i < mbs; i++) { 127567877ebaSShri Abhyankar countA = ai[i + 1] - ai[i]; 127667877ebaSShri Abhyankar countB = bi[i + 1] - bi[i]; 127767877ebaSShri Abhyankar ajj = aj + ai[i]; 127867877ebaSShri Abhyankar bjj = bj + bi[i]; 127967877ebaSShri Abhyankar v1 = av + bs2 * ai[i]; 128067877ebaSShri Abhyankar v2 = bv + bs2 * bi[i]; 128167877ebaSShri Abhyankar 128267877ebaSShri Abhyankar idx = 0; 128367877ebaSShri Abhyankar /* A-part */ 128467877ebaSShri Abhyankar for (k = 0; k < countA; k++) { 128567877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 128667877ebaSShri Abhyankar for (n = 0; n < bs; n++) { 1287bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 12889566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 12895d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 129067877ebaSShri Abhyankar } 129167877ebaSShri Abhyankar val[jj++] = v1[idx++]; 129267877ebaSShri Abhyankar } 129367877ebaSShri Abhyankar } 129467877ebaSShri Abhyankar } 129567877ebaSShri Abhyankar 129667877ebaSShri Abhyankar idx = 0; 129767877ebaSShri Abhyankar /* B-part */ 129867877ebaSShri Abhyankar for (k = 0; k < countB; k++) { 129967877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 130067877ebaSShri Abhyankar for (n = 0; n < bs; n++) { 1301bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 13029566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 13039566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 130467877ebaSShri Abhyankar } 1305d985c460SShri Abhyankar val[jj++] = v2[idx++]; 130667877ebaSShri Abhyankar } 130767877ebaSShri Abhyankar } 130867877ebaSShri Abhyankar } 1309d985c460SShri Abhyankar irow += bs; 131067877ebaSShri Abhyankar } 13113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 131267877ebaSShri Abhyankar } 131367877ebaSShri Abhyankar 131466976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1315d71ae5a4SJacob Faibussowitsch { 131616ebf90aSShri Abhyankar const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 13176497c311SBarry Smith PetscCount rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 1318a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 131916ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 132016ebf90aSShri Abhyankar PetscScalar *val; 1321a3d589ffSStefano Zampini Mat Ad, Ao; 1322a3d589ffSStefano Zampini Mat_SeqAIJ *aa; 1323a3d589ffSStefano Zampini Mat_SeqAIJ *bb; 132438548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 1325b94d7dedSBarry Smith PetscBool hermitian, isset; 132638548759SBarry Smith #endif 132716ebf90aSShri Abhyankar 132816ebf90aSShri Abhyankar PetscFunctionBegin; 132938548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 1330b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 1331b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 133238548759SBarry Smith #endif 13339566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 13349566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 13359566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 1336a3d589ffSStefano Zampini 133757508eceSPierre Jolivet aa = (Mat_SeqAIJ *)Ad->data; 133857508eceSPierre Jolivet bb = (Mat_SeqAIJ *)Ao->data; 133938548759SBarry Smith ai = aa->i; 134038548759SBarry Smith aj = aa->j; 134138548759SBarry Smith bi = bb->i; 134238548759SBarry Smith bj = bb->j; 1343421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(Ad, &adiag, NULL)); 134416ebf90aSShri Abhyankar rstart = A->rmap->rstart; 134516ebf90aSShri Abhyankar 1346bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 1347e0bace9bSHong Zhang nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 1348e0bace9bSHong Zhang nzb = 0; /* num of upper triangular entries in mat->B */ 134916ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 1350e0bace9bSHong Zhang nza += (ai[i + 1] - adiag[i]); 135116ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 135216ebf90aSShri Abhyankar bjj = bj + bi[i]; 1353e0bace9bSHong Zhang for (j = 0; j < countB; j++) { 1354e0bace9bSHong Zhang if (garray[bjj[j]] > rstart) nzb++; 1355e0bace9bSHong Zhang } 1356e0bace9bSHong Zhang } 135716ebf90aSShri Abhyankar 1358e0bace9bSHong Zhang nz = nza + nzb; /* total nz of upper triangular part of mat */ 13599566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 13609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 1361a6053eceSJunchao Zhang mumps->nnz = nz; 1362a6053eceSJunchao Zhang mumps->irn = row; 1363a6053eceSJunchao Zhang mumps->jcn = col; 1364a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 136516ebf90aSShri Abhyankar } else { 1366a6053eceSJunchao Zhang val = mumps->val; 136716ebf90aSShri Abhyankar } 136816ebf90aSShri Abhyankar 13699371c9d4SSatish Balay jj = 0; 13709371c9d4SSatish Balay irow = rstart; 137116ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 137216ebf90aSShri Abhyankar ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 137316ebf90aSShri Abhyankar v1 = av + adiag[i]; 137416ebf90aSShri Abhyankar countA = ai[i + 1] - adiag[i]; 137516ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 137616ebf90aSShri Abhyankar bjj = bj + bi[i]; 137716ebf90aSShri Abhyankar v2 = bv + bi[i]; 137816ebf90aSShri Abhyankar 137916ebf90aSShri Abhyankar /* A-part */ 138016ebf90aSShri Abhyankar for (j = 0; j < countA; j++) { 1381bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 13829566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 13839566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 138416ebf90aSShri Abhyankar } 138516ebf90aSShri Abhyankar val[jj++] = v1[j]; 138616ebf90aSShri Abhyankar } 138716ebf90aSShri Abhyankar 138816ebf90aSShri Abhyankar /* B-part */ 138916ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 139016ebf90aSShri Abhyankar if (garray[bjj[j]] > rstart) { 1391bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 13929566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 13939566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 139416ebf90aSShri Abhyankar } 139516ebf90aSShri Abhyankar val[jj++] = v2[j]; 139616ebf90aSShri Abhyankar } 1397397b6df1SKris Buschelman } 1398397b6df1SKris Buschelman irow++; 1399397b6df1SKris Buschelman } 14009566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 14019566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 14023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1403397b6df1SKris Buschelman } 1404397b6df1SKris Buschelman 1405d2a308c1SPierre Jolivet static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1406c3e1b152SPierre Jolivet { 1407c3e1b152SPierre Jolivet const PetscScalar *av; 1408c3e1b152SPierre Jolivet const PetscInt M = A->rmap->n; 14096497c311SBarry Smith PetscCount i; 1410c3e1b152SPierre Jolivet PetscMUMPSInt *row, *col; 1411c3e1b152SPierre Jolivet Vec v; 1412c3e1b152SPierre Jolivet 1413c3e1b152SPierre Jolivet PetscFunctionBegin; 1414c3e1b152SPierre Jolivet PetscCall(MatDiagonalGetDiagonal(A, &v)); 1415c3e1b152SPierre Jolivet PetscCall(VecGetArrayRead(v, &av)); 1416c3e1b152SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 1417c3e1b152SPierre Jolivet PetscCall(PetscMalloc2(M, &row, M, &col)); 1418c3e1b152SPierre Jolivet for (i = 0; i < M; i++) { 1419c3e1b152SPierre Jolivet PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1420c3e1b152SPierre Jolivet col[i] = row[i]; 1421c3e1b152SPierre Jolivet } 1422c3e1b152SPierre Jolivet mumps->val = (PetscScalar *)av; 1423c3e1b152SPierre Jolivet mumps->irn = row; 1424c3e1b152SPierre Jolivet mumps->jcn = col; 1425c3e1b152SPierre Jolivet mumps->nnz = M; 1426127cd276SPierre Jolivet } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1427127cd276SPierre Jolivet else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1428c3e1b152SPierre Jolivet PetscCall(VecRestoreArrayRead(v, &av)); 1429c3e1b152SPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS); 1430c3e1b152SPierre Jolivet } 1431c3e1b152SPierre Jolivet 1432d2a308c1SPierre Jolivet static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 14334b9405b2SPierre Jolivet { 14344b9405b2SPierre Jolivet PetscScalar *v; 14354b9405b2SPierre Jolivet const PetscInt m = A->rmap->n, N = A->cmap->N; 14364b9405b2SPierre Jolivet PetscInt lda; 14376497c311SBarry Smith PetscCount i, j; 14384b9405b2SPierre Jolivet PetscMUMPSInt *row, *col; 14394b9405b2SPierre Jolivet 14404b9405b2SPierre Jolivet PetscFunctionBegin; 14414b9405b2SPierre Jolivet PetscCall(MatDenseGetArray(A, &v)); 14424b9405b2SPierre Jolivet PetscCall(MatDenseGetLDA(A, &lda)); 14434b9405b2SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 14444b9405b2SPierre Jolivet PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 14454b9405b2SPierre Jolivet for (i = 0; i < m; i++) { 14464b9405b2SPierre Jolivet col[i] = 0; 14474b9405b2SPierre Jolivet PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 14484b9405b2SPierre Jolivet } 14494b9405b2SPierre Jolivet for (j = 1; j < N; j++) { 14504b9405b2SPierre Jolivet for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 14514b9405b2SPierre Jolivet PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 14524b9405b2SPierre Jolivet } 14534b9405b2SPierre Jolivet if (lda == m) mumps->val = v; 14544b9405b2SPierre Jolivet else { 14554b9405b2SPierre Jolivet PetscCall(PetscMalloc1(m * N, &mumps->val)); 14564b9405b2SPierre Jolivet mumps->val_alloc = mumps->val; 14574b9405b2SPierre Jolivet for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 14584b9405b2SPierre Jolivet } 14594b9405b2SPierre Jolivet mumps->irn = row; 14604b9405b2SPierre Jolivet mumps->jcn = col; 14614b9405b2SPierre Jolivet mumps->nnz = m * N; 14624b9405b2SPierre Jolivet } else { 14634b9405b2SPierre Jolivet if (lda == m && !mumps->nest_vals) mumps->val = v; 14644b9405b2SPierre Jolivet else { 14654b9405b2SPierre Jolivet for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 14664b9405b2SPierre Jolivet } 14674b9405b2SPierre Jolivet } 14684b9405b2SPierre Jolivet PetscCall(MatDenseRestoreArray(A, &v)); 14694b9405b2SPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS); 14704b9405b2SPierre Jolivet } 14714b9405b2SPierre Jolivet 147253587d93SPierre Jolivet // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a 147353587d93SPierre Jolivet // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated 147453587d93SPierre Jolivet // and its rows and columns permuted 147553587d93SPierre Jolivet // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest() 147653587d93SPierre Jolivet static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap) 147753587d93SPierre Jolivet { 147853587d93SPierre Jolivet Mat A; 147953587d93SPierre Jolivet PetscScalar s[2]; 148053587d93SPierre Jolivet PetscBool isTrans, isHTrans, compare; 148153587d93SPierre Jolivet 148253587d93SPierre Jolivet PetscFunctionBegin; 148353587d93SPierre Jolivet do { 148453587d93SPierre Jolivet PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans)); 148553587d93SPierre Jolivet if (isTrans) { 148653587d93SPierre Jolivet PetscCall(MatTransposeGetMat(*sub, &A)); 148753587d93SPierre Jolivet isHTrans = PETSC_FALSE; 148853587d93SPierre Jolivet } else { 148953587d93SPierre Jolivet PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 149053587d93SPierre Jolivet if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A)); 149153587d93SPierre Jolivet } 149253587d93SPierre Jolivet compare = (PetscBool)(isTrans || isHTrans); 149353587d93SPierre Jolivet if (compare) { 149453587d93SPierre Jolivet if (vshift && vscale) { 149553587d93SPierre Jolivet PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 149653587d93SPierre Jolivet if (!*conjugate) { 149753587d93SPierre Jolivet *vshift += s[0] * *vscale; 149853587d93SPierre Jolivet *vscale *= s[1]; 149953587d93SPierre Jolivet } else { 150053587d93SPierre Jolivet *vshift += PetscConj(s[0]) * *vscale; 150153587d93SPierre Jolivet *vscale *= PetscConj(s[1]); 150253587d93SPierre Jolivet } 150353587d93SPierre Jolivet } 150453587d93SPierre Jolivet if (swap) *swap = (PetscBool)!*swap; 150553587d93SPierre Jolivet if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate; 150653587d93SPierre Jolivet *sub = A; 150753587d93SPierre Jolivet } 150853587d93SPierre Jolivet } while (compare); 150953587d93SPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS); 151053587d93SPierre Jolivet } 151153587d93SPierre Jolivet 151266976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 15139d0448ceSStefano Zampini { 15149d0448ceSStefano Zampini Mat **mats; 15159d0448ceSStefano Zampini PetscInt nr, nc; 15169d0448ceSStefano Zampini PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 15179d0448ceSStefano Zampini 15189d0448ceSStefano Zampini PetscFunctionBegin; 15199d0448ceSStefano Zampini PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 15209d0448ceSStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 15219d0448ceSStefano Zampini PetscMUMPSInt *irns, *jcns; 15229d0448ceSStefano Zampini PetscScalar *vals; 15236497c311SBarry Smith PetscCount totnnz, cumnnz, maxnnz; 152493d70b8aSPierre Jolivet PetscInt *pjcns_w, Mbs = 0; 15259d0448ceSStefano Zampini IS *rows, *cols; 15269d0448ceSStefano Zampini PetscInt **rows_idx, **cols_idx; 15279d0448ceSStefano Zampini 15289d0448ceSStefano Zampini cumnnz = 0; 15299d0448ceSStefano Zampini maxnnz = 0; 15305d955bbbSStefano Zampini PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 15319d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 15329d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 15339d0448ceSStefano Zampini Mat sub = mats[r][c]; 15349d0448ceSStefano Zampini 15359d0448ceSStefano Zampini mumps->nest_convert_to_triples[r * nc + c] = NULL; 15369d0448ceSStefano Zampini if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 15379d0448ceSStefano Zampini if (sub) { 15389d0448ceSStefano Zampini PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 153953587d93SPierre Jolivet PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 15409d0448ceSStefano Zampini MatInfo info; 15419d0448ceSStefano Zampini 154253587d93SPierre Jolivet PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 15439d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 15449d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 15459d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 15469d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 15479d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 15489d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1549c3e1b152SPierre Jolivet PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 15504b9405b2SPierre Jolivet PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 15519d0448ceSStefano Zampini 15529d0448ceSStefano Zampini if (chol) { 15539d0448ceSStefano Zampini if (r == c) { 15549d0448ceSStefano Zampini if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 15559d0448ceSStefano Zampini else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 15569d0448ceSStefano Zampini else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 15579d0448ceSStefano Zampini else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1558c3e1b152SPierre Jolivet else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 15594b9405b2SPierre Jolivet else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 15609d0448ceSStefano Zampini } else { 15619d0448ceSStefano Zampini if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 15629d0448ceSStefano Zampini else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 15639d0448ceSStefano Zampini else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 15649d0448ceSStefano Zampini else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1565c3e1b152SPierre Jolivet else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 15664b9405b2SPierre Jolivet else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 15679d0448ceSStefano Zampini } 15689d0448ceSStefano Zampini } else { 15699d0448ceSStefano Zampini if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 15709d0448ceSStefano Zampini else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 15719d0448ceSStefano Zampini else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 15729d0448ceSStefano Zampini else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1573c3e1b152SPierre Jolivet else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 15744b9405b2SPierre Jolivet else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 15759d0448ceSStefano Zampini } 15769d0448ceSStefano Zampini PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 15779d0448ceSStefano Zampini mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 15789d0448ceSStefano Zampini PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 15796497c311SBarry Smith cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */ 15809d0448ceSStefano Zampini maxnnz = PetscMax(maxnnz, info.nz_used); 15819d0448ceSStefano Zampini } 15829d0448ceSStefano Zampini } 15839d0448ceSStefano Zampini } 15849d0448ceSStefano Zampini 15859d0448ceSStefano Zampini /* Allocate total COO */ 15869d0448ceSStefano Zampini totnnz = cumnnz; 15879d0448ceSStefano Zampini PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 15889d0448ceSStefano Zampini PetscCall(PetscMalloc1(totnnz, &vals)); 15899d0448ceSStefano Zampini 15909d0448ceSStefano Zampini /* Handle rows and column maps 15919d0448ceSStefano Zampini We directly map rows and use an SF for the columns */ 15929d0448ceSStefano Zampini PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 15939d0448ceSStefano Zampini PetscCall(MatNestGetISs(A, rows, cols)); 15949d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 15959d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 15969d0448ceSStefano Zampini if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 15975d955bbbSStefano Zampini else (void)maxnnz; 15989d0448ceSStefano Zampini 15999d0448ceSStefano Zampini cumnnz = 0; 16009d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 16019d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 16029d0448ceSStefano Zampini Mat sub = mats[r][c]; 16039d0448ceSStefano Zampini const PetscInt *ridx = rows_idx[r]; 16045d955bbbSStefano Zampini const PetscInt *cidx = cols_idx[c]; 160553587d93SPierre Jolivet PetscScalar vscale = 1.0, vshift = 0.0; 160693d70b8aSPierre Jolivet PetscInt rst, size, bs; 16079d0448ceSStefano Zampini PetscSF csf; 160853587d93SPierre Jolivet PetscBool conjugate = PETSC_FALSE, swap = PETSC_FALSE; 16095d955bbbSStefano Zampini PetscLayout cmap; 16106497c311SBarry Smith PetscInt innz; 16119d0448ceSStefano Zampini 16129d0448ceSStefano Zampini mumps->nest_vals_start[r * nc + c] = cumnnz; 161393d70b8aSPierre Jolivet if (c == r) { 161493d70b8aSPierre Jolivet PetscCall(ISGetSize(rows[r], &size)); 161593d70b8aSPierre Jolivet if (!mumps->nest_convert_to_triples[r * nc + c]) { 161693d70b8aSPierre Jolivet for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 161793d70b8aSPierre Jolivet } 161893d70b8aSPierre Jolivet PetscCall(MatGetBlockSize(sub, &bs)); 161993d70b8aSPierre Jolivet Mbs += size / bs; 162093d70b8aSPierre Jolivet } 16219d0448ceSStefano Zampini if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 16229d0448ceSStefano Zampini 16235d955bbbSStefano Zampini /* Extract inner blocks if needed */ 162453587d93SPierre Jolivet PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap)); 162553587d93SPierre Jolivet PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 16265d955bbbSStefano Zampini 16275d955bbbSStefano Zampini /* Get column layout to map off-process columns */ 16285d955bbbSStefano Zampini PetscCall(MatGetLayouts(sub, NULL, &cmap)); 16295d955bbbSStefano Zampini 16305d955bbbSStefano Zampini /* Get row start to map on-process rows */ 16315d955bbbSStefano Zampini PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 16325d955bbbSStefano Zampini 16339d0448ceSStefano Zampini /* Directly use the mumps datastructure and use C ordering for now */ 16349d0448ceSStefano Zampini PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 16359d0448ceSStefano Zampini 16365d955bbbSStefano Zampini /* Swap the role of rows and columns indices for transposed blocks 16375d955bbbSStefano Zampini since we need values with global final ordering */ 16385d955bbbSStefano Zampini if (swap) { 16395d955bbbSStefano Zampini cidx = rows_idx[r]; 16405d955bbbSStefano Zampini ridx = cols_idx[c]; 16419d0448ceSStefano Zampini } 16429d0448ceSStefano Zampini 16435d955bbbSStefano Zampini /* Communicate column indices 16445d955bbbSStefano Zampini This could have been done with a single SF but it would have complicated the code a lot. 16455d955bbbSStefano Zampini But since we do it only once, we pay the price of setting up an SF for each block */ 16465d955bbbSStefano Zampini if (PetscDefined(USE_64BIT_INDICES)) { 16475d955bbbSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1648f4f49eeaSPierre Jolivet } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */ 16499d0448ceSStefano Zampini PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 16506497c311SBarry Smith PetscCall(PetscIntCast(mumps->nnz, &innz)); 16516497c311SBarry Smith PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w)); 16525d955bbbSStefano Zampini PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 16535d955bbbSStefano Zampini PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 16549d0448ceSStefano Zampini PetscCall(PetscSFDestroy(&csf)); 16559d0448ceSStefano Zampini 16565d955bbbSStefano Zampini /* Import indices: use direct map for rows and mapped indices for columns */ 16575d955bbbSStefano Zampini if (swap) { 16585d955bbbSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) { 16595d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 16605d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 16615d955bbbSStefano Zampini } 16625d955bbbSStefano Zampini } else { 16635d955bbbSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) { 16645d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 16655d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 16665d955bbbSStefano Zampini } 16675d955bbbSStefano Zampini } 16685d955bbbSStefano Zampini 16695d955bbbSStefano Zampini /* Import values to full COO */ 167053587d93SPierre Jolivet if (conjugate) { /* conjugate the entries */ 167150c845baSStefano Zampini PetscScalar *v = vals + cumnnz; 167253587d93SPierre Jolivet for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]); 167353587d93SPierre Jolivet } else if (vscale != 1.0) { 167453587d93SPierre Jolivet PetscScalar *v = vals + cumnnz; 167553587d93SPierre Jolivet for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k]; 167653587d93SPierre Jolivet } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 16779d0448ceSStefano Zampini 16789d0448ceSStefano Zampini /* Shift new starting point and sanity check */ 16799d0448ceSStefano Zampini cumnnz += mumps->nnz; 16806497c311SBarry Smith PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 16819d0448ceSStefano Zampini 16829d0448ceSStefano Zampini /* Free scratch memory */ 16839d0448ceSStefano Zampini PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 16849d0448ceSStefano Zampini PetscCall(PetscFree(mumps->val_alloc)); 16859d0448ceSStefano Zampini mumps->val = NULL; 16869d0448ceSStefano Zampini mumps->nnz = 0; 16879d0448ceSStefano Zampini } 16889d0448ceSStefano Zampini } 168993d70b8aSPierre Jolivet if (mumps->id.ICNTL(15) == 1) { 169093d70b8aSPierre Jolivet if (Mbs != A->rmap->N) { 169193d70b8aSPierre Jolivet PetscMPIInt rank, size; 169293d70b8aSPierre Jolivet 169393d70b8aSPierre Jolivet PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 169493d70b8aSPierre Jolivet PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 169593d70b8aSPierre Jolivet if (rank == 0) { 169693d70b8aSPierre Jolivet PetscInt shift = 0; 169793d70b8aSPierre Jolivet 169893d70b8aSPierre Jolivet PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk)); 169993d70b8aSPierre Jolivet PetscCall(PetscFree(mumps->id.blkptr)); 170093d70b8aSPierre Jolivet PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr)); 170193d70b8aSPierre Jolivet mumps->id.blkptr[0] = 1; 170293d70b8aSPierre Jolivet for (PetscInt i = 0; i < size; ++i) { 170393d70b8aSPierre Jolivet for (PetscInt r = 0; r < nr; r++) { 170493d70b8aSPierre Jolivet Mat sub = mats[r][r]; 170593d70b8aSPierre Jolivet const PetscInt *ranges; 170693d70b8aSPierre Jolivet PetscInt bs; 170793d70b8aSPierre Jolivet 170893d70b8aSPierre Jolivet for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 170993d70b8aSPierre Jolivet PetscCall(MatGetOwnershipRanges(sub, &ranges)); 171093d70b8aSPierre Jolivet PetscCall(MatGetBlockSize(sub, &bs)); 171193d70b8aSPierre Jolivet for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1)); 171293d70b8aSPierre Jolivet shift += (ranges[i + 1] - ranges[i]) / bs; 171393d70b8aSPierre Jolivet } 171493d70b8aSPierre Jolivet } 171593d70b8aSPierre Jolivet } 171693d70b8aSPierre Jolivet } else mumps->id.ICNTL(15) = 0; 171793d70b8aSPierre Jolivet } 17189d0448ceSStefano Zampini if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 17199d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 17209d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 17219d0448ceSStefano Zampini PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 17226497c311SBarry Smith if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 17235d955bbbSStefano Zampini mumps->nest_vals_start[nr * nc] = cumnnz; 17249d0448ceSStefano Zampini 17259d0448ceSStefano Zampini /* Set pointers for final MUMPS data structure */ 17269d0448ceSStefano Zampini mumps->nest_vals = vals; 17279d0448ceSStefano Zampini mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 17289d0448ceSStefano Zampini mumps->val = vals; 17299d0448ceSStefano Zampini mumps->irn = irns; 17309d0448ceSStefano Zampini mumps->jcn = jcns; 17319d0448ceSStefano Zampini mumps->nnz = cumnnz; 17329d0448ceSStefano Zampini } else { 17339d0448ceSStefano Zampini PetscScalar *oval = mumps->nest_vals; 17349d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 17359d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 173653587d93SPierre Jolivet PetscBool conjugate = PETSC_FALSE; 17375d955bbbSStefano Zampini Mat sub = mats[r][c]; 173853587d93SPierre Jolivet PetscScalar vscale = 1.0, vshift = 0.0; 17395d955bbbSStefano Zampini PetscInt midx = r * nc + c; 17405d955bbbSStefano Zampini 17415d955bbbSStefano Zampini if (!mumps->nest_convert_to_triples[midx]) continue; 174253587d93SPierre Jolivet PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL)); 174353587d93SPierre Jolivet PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 17445d955bbbSStefano Zampini mumps->val = oval + mumps->nest_vals_start[midx]; 17455d955bbbSStefano Zampini PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 174653587d93SPierre Jolivet if (conjugate) { 17476497c311SBarry Smith PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 174853587d93SPierre Jolivet for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]); 174953587d93SPierre Jolivet } else if (vscale != 1.0) { 175053587d93SPierre Jolivet PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 175153587d93SPierre Jolivet for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale; 17525d955bbbSStefano Zampini } 17539d0448ceSStefano Zampini } 17549d0448ceSStefano Zampini } 17559d0448ceSStefano Zampini mumps->val = oval; 17569d0448ceSStefano Zampini } 17579d0448ceSStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 17589d0448ceSStefano Zampini } 17599d0448ceSStefano Zampini 176066976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MUMPS(Mat A) 1761d71ae5a4SJacob Faibussowitsch { 1762a6053eceSJunchao Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1763b24902e0SBarry Smith 1764397b6df1SKris Buschelman PetscFunctionBegin; 1765cf053153SJunchao Zhang PetscCall(PetscFree(mumps->id.isol_loc)); 17669566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 17679566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 17689566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->b_seq)); 17699566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->x_seq)); 17709566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.perm_in)); 177193d70b8aSPierre Jolivet PetscCall(PetscFree(mumps->id.blkvar)); 177293d70b8aSPierre Jolivet PetscCall(PetscFree(mumps->id.blkptr)); 17739566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 17749566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->val_alloc)); 17759566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->info)); 1776413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->ICNTL_pre)); 1777413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->CNTL_pre)); 17789566063dSJacob Faibussowitsch PetscCall(MatMumpsResetSchur_Private(mumps)); 1779413bcc21SPierre Jolivet if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1780a5e57a09SHong Zhang mumps->id.job = JOB_END; 17813ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 17829261f6e4SBarry Smith PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1783413bcc21SPierre Jolivet if (mumps->mumps_comm != MPI_COMM_NULL) { 1784413bcc21SPierre Jolivet if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1785413bcc21SPierre Jolivet else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1786413bcc21SPierre Jolivet } 1787413bcc21SPierre Jolivet } 1788cf053153SJunchao Zhang PetscCall(MatMumpsFreeInternalID(&mumps->id)); 17893ab56b82SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 179067602552SJunchao Zhang if (mumps->use_petsc_omp_support) { 17919566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 17929566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 17939566063dSJacob Faibussowitsch PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 179467602552SJunchao Zhang } 17953ab56b82SJunchao Zhang #endif 17969566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ia_alloc)); 17979566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ja_alloc)); 17989566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->recvcount)); 17999566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->reqs)); 18009566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->irhs_loc)); 18019d0448ceSStefano Zampini PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 18029d0448ceSStefano Zampini PetscCall(PetscFree(mumps->nest_vals)); 18039566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1804bf0cc555SLisandro Dalcin 180597969023SHong Zhang /* clear composed functions */ 18069566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 18079566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 18089566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 18099566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 18109566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 18119566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 18129566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 18139566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 18149566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 18159566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 18169566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 18175c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 18189566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 18199566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 182093d70b8aSPierre Jolivet PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL)); 18213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1822397b6df1SKris Buschelman } 1823397b6df1SKris Buschelman 182467602552SJunchao Zhang /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1825d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1826d71ae5a4SJacob Faibussowitsch { 182767602552SJunchao Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 182867602552SJunchao Zhang const PetscMPIInt ompsize = mumps->omp_comm_size; 182967602552SJunchao Zhang PetscInt i, m, M, rstart; 183067602552SJunchao Zhang 183167602552SJunchao Zhang PetscFunctionBegin; 18329566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &M, NULL)); 18339566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &m, NULL)); 183408401ef6SPierre Jolivet PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 183567602552SJunchao Zhang if (ompsize == 1) { 183667602552SJunchao Zhang if (!mumps->irhs_loc) { 18376497c311SBarry Smith mumps->nloc_rhs = (PetscMUMPSInt)m; 18389566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 18399566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 18406497c311SBarry Smith for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */ 184167602552SJunchao Zhang } 1842cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc)); 184367602552SJunchao Zhang } else { 184467602552SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 184567602552SJunchao Zhang const PetscInt *ranges; 184667602552SJunchao Zhang PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 184767602552SJunchao Zhang MPI_Group petsc_group, omp_group; 184867602552SJunchao Zhang PetscScalar *recvbuf = NULL; 184967602552SJunchao Zhang 185067602552SJunchao Zhang if (mumps->is_omp_master) { 185167602552SJunchao Zhang /* Lazily initialize the omp stuff for distributed rhs */ 185267602552SJunchao Zhang if (!mumps->irhs_loc) { 18539566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 18549566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 18559566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 18569566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 185767602552SJunchao Zhang for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 18589566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 185967602552SJunchao Zhang 186067602552SJunchao Zhang /* Populate mumps->irhs_loc[], rhs_nrow[] */ 186167602552SJunchao Zhang mumps->nloc_rhs = 0; 18629566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(A, &ranges)); 186367602552SJunchao Zhang for (j = 0; j < ompsize; j++) { 186467602552SJunchao Zhang mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 186567602552SJunchao Zhang mumps->nloc_rhs += mumps->rhs_nrow[j]; 186667602552SJunchao Zhang } 18679566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 186867602552SJunchao Zhang for (j = k = 0; j < ompsize; j++) { 1869407b358cSPierre Jolivet for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */ 187067602552SJunchao Zhang } 187167602552SJunchao Zhang 18729566063dSJacob Faibussowitsch PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 18739566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&petsc_group)); 18749566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&omp_group)); 187567602552SJunchao Zhang } 187667602552SJunchao Zhang 187767602552SJunchao Zhang /* Realloc buffers when current nrhs is bigger than what we have met */ 187867602552SJunchao Zhang if (nrhs > mumps->max_nrhs) { 18799566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 18809566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 188167602552SJunchao Zhang mumps->max_nrhs = nrhs; 188267602552SJunchao Zhang } 188367602552SJunchao Zhang 188467602552SJunchao Zhang /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 18859566063dSJacob Faibussowitsch for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 188667602552SJunchao Zhang mumps->rhs_disps[0] = 0; 188767602552SJunchao Zhang for (j = 1; j < ompsize; j++) { 188867602552SJunchao Zhang mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 188908401ef6SPierre Jolivet PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 189067602552SJunchao Zhang } 189167602552SJunchao Zhang recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 189267602552SJunchao Zhang } 189367602552SJunchao Zhang 18949566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 18959566063dSJacob Faibussowitsch PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 189667602552SJunchao Zhang 189767602552SJunchao Zhang if (mumps->is_omp_master) { 189867602552SJunchao Zhang if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 189967602552SJunchao Zhang PetscScalar *dst, *dstbase = mumps->rhs_loc; 190067602552SJunchao Zhang for (j = 0; j < ompsize; j++) { 190167602552SJunchao Zhang const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 190267602552SJunchao Zhang dst = dstbase; 190367602552SJunchao Zhang for (i = 0; i < nrhs; i++) { 19049566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 190567602552SJunchao Zhang src += mumps->rhs_nrow[j]; 190667602552SJunchao Zhang dst += mumps->nloc_rhs; 190767602552SJunchao Zhang } 190867602552SJunchao Zhang dstbase += mumps->rhs_nrow[j]; 190967602552SJunchao Zhang } 191067602552SJunchao Zhang } 1911cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc)); 191267602552SJunchao Zhang } 191367602552SJunchao Zhang #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 191467602552SJunchao Zhang } 19156497c311SBarry Smith mumps->id.nrhs = (PetscMUMPSInt)nrhs; 19166497c311SBarry Smith mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs; 191767602552SJunchao Zhang mumps->id.lrhs_loc = mumps->nloc_rhs; 191867602552SJunchao Zhang mumps->id.irhs_loc = mumps->irhs_loc; 19193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 192067602552SJunchao Zhang } 192167602552SJunchao Zhang 192266976f2fSJacob Faibussowitsch static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1923d71ae5a4SJacob Faibussowitsch { 1924e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1925cf053153SJunchao Zhang const PetscScalar *barray = NULL; 1926d54de34fSKris Buschelman PetscScalar *array; 1927329ec9b3SHong Zhang IS is_iden, is_petsc; 1928329ec9b3SHong Zhang PetscInt i; 1929cc86f929SStefano Zampini PetscBool second_solve = PETSC_FALSE; 1930883f2eb9SBarry Smith static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1931397b6df1SKris Buschelman 1932397b6df1SKris Buschelman PetscFunctionBegin; 19339371c9d4SSatish Balay PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 19349371c9d4SSatish Balay "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 19359371c9d4SSatish Balay &cite1)); 19369371c9d4SSatish Balay PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 19379371c9d4SSatish Balay "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 19389371c9d4SSatish Balay &cite2)); 19392aca8efcSHong Zhang 1940f480ea8aSBarry Smith PetscCall(VecFlag(x, A->factorerrortype)); 1941603e8f96SBarry Smith if (A->factorerrortype) { 19429566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 19433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 19442aca8efcSHong Zhang } 19452aca8efcSHong Zhang 1946a5e57a09SHong Zhang mumps->id.nrhs = 1; 19472d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 194825aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 1949cf053153SJunchao Zhang mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */ 1950cf053153SJunchao Zhang PetscCall(VecGetArrayRead(b, &barray)); 1951cf053153SJunchao Zhang PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray)); 195225aac85cSJunchao Zhang } else { 1953cf053153SJunchao Zhang mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/ 19549566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 19559566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 195667602552SJunchao Zhang if (!mumps->myid) { 19579566063dSJacob Faibussowitsch PetscCall(VecGetArray(mumps->b_seq, &array)); 1958cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 195967602552SJunchao Zhang } 196025aac85cSJunchao Zhang } 1961cf053153SJunchao Zhang } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */ 1962cf053153SJunchao Zhang mumps->id.ICNTL(20) = 0; 19639566063dSJacob Faibussowitsch PetscCall(VecCopy(b, x)); 19649566063dSJacob Faibussowitsch PetscCall(VecGetArray(x, &array)); 1965cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 1966397b6df1SKris Buschelman } 1967397b6df1SKris Buschelman 1968cc86f929SStefano Zampini /* 1969cc86f929SStefano Zampini handle condensation step of Schur complement (if any) 1970cc86f929SStefano Zampini We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1971cc86f929SStefano Zampini According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1972cc86f929SStefano Zampini Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1973cc86f929SStefano Zampini This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1974cc86f929SStefano Zampini */ 19753e5b40d0SPierre Jolivet if (mumps->id.size_schur > 0) { 197608401ef6SPierre Jolivet PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 19773e5b40d0SPierre Jolivet if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1978cc86f929SStefano Zampini second_solve = PETSC_TRUE; 1979cf053153SJunchao Zhang PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs 19803e5b40d0SPierre Jolivet mumps->id.ICNTL(26) = 1; /* condensation phase */ 19813e5b40d0SPierre Jolivet } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1982cc86f929SStefano Zampini } 1983cf053153SJunchao Zhang 1984a5e57a09SHong Zhang mumps->id.job = JOB_SOLVE; 1985cf053153SJunchao Zhang PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs 1986cf053153SJunchao Zhang PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 1987397b6df1SKris Buschelman 1988b5fa320bSStefano Zampini /* handle expansion step of Schur complement (if any) */ 19891baa6e33SBarry Smith if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1990cf053153SJunchao Zhang else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side 19913e5b40d0SPierre Jolivet PetscCall(MatMumpsSolveSchur_Private(A)); 1992cf053153SJunchao Zhang for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i); 19933e5b40d0SPierre Jolivet } 1994b5fa320bSStefano Zampini 1995f0b74427SPierre Jolivet if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */ 1996a5e57a09SHong Zhang if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1997cf053153SJunchao Zhang /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 19989566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1999397b6df1SKris Buschelman } 2000a5e57a09SHong Zhang if (!mumps->scat_sol) { /* create scatter scat_sol */ 2001a6053eceSJunchao Zhang PetscInt *isol2_loc = NULL; 20029566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 20039566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 2004a6053eceSJunchao Zhang for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 20059566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 20069566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 20079566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_iden)); 20089566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_petsc)); 2009a5e57a09SHong Zhang mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 2010397b6df1SKris Buschelman } 2011a5e57a09SHong Zhang 2012cf053153SJunchao Zhang PetscScalar *xarray; 2013cf053153SJunchao Zhang PetscCall(VecGetArray(mumps->x_seq, &xarray)); 2014cf053153SJunchao Zhang PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray)); 2015cf053153SJunchao Zhang PetscCall(VecRestoreArray(mumps->x_seq, &xarray)); 20169566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 20179566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 2018353d7d71SJunchao Zhang 2019cf053153SJunchao Zhang if (mumps->ICNTL20 == 10) { // distributed RHS 2020cf053153SJunchao Zhang PetscCall(VecRestoreArrayRead(b, &barray)); 2021cf053153SJunchao Zhang } else if (!mumps->myid) { // centralized RHS 20229566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mumps->b_seq, &array)); 202325aac85cSJunchao Zhang } 2024cf053153SJunchao Zhang } else { 2025cf053153SJunchao Zhang // id.rhs has the solution in mumps precision 2026cf053153SJunchao Zhang PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array)); 2027cf053153SJunchao Zhang PetscCall(VecRestoreArray(x, &array)); 2028cf053153SJunchao Zhang } 2029353d7d71SJunchao Zhang 203064412097SPierre Jolivet PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 20313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2032397b6df1SKris Buschelman } 2033397b6df1SKris Buschelman 203466976f2fSJacob Faibussowitsch static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 2035d71ae5a4SJacob Faibussowitsch { 2036e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2037338d3105SPierre Jolivet const PetscMUMPSInt value = mumps->id.ICNTL(9); 203851d5961aSHong Zhang 203951d5961aSHong Zhang PetscFunctionBegin; 2040a5e57a09SHong Zhang mumps->id.ICNTL(9) = 0; 20419566063dSJacob Faibussowitsch PetscCall(MatSolve_MUMPS(A, b, x)); 2042338d3105SPierre Jolivet mumps->id.ICNTL(9) = value; 20433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 204451d5961aSHong Zhang } 204551d5961aSHong Zhang 204666976f2fSJacob Faibussowitsch static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 2047d71ae5a4SJacob Faibussowitsch { 2048b8491c3eSStefano Zampini Mat Bt = NULL; 2049a6053eceSJunchao Zhang PetscBool denseX, denseB, flg, flgT; 2050e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2051917c3dccSPierre Jolivet PetscInt i, nrhs, M, nrhsM; 20521683a169SBarry Smith PetscScalar *array; 2053cf053153SJunchao Zhang const PetscScalar *barray; 2054a6053eceSJunchao Zhang PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 2055a6053eceSJunchao Zhang PetscMUMPSInt *isol_loc, *isol_loc_save; 2056cf053153SJunchao Zhang PetscScalar *sol_loc; 2057cf053153SJunchao Zhang void *sol_loc_save; 2058cf053153SJunchao Zhang PetscCount sol_loc_len_save; 2059be818407SHong Zhang IS is_to, is_from; 2060beae5ec0SHong Zhang PetscInt k, proc, j, m, myrstart; 2061be818407SHong Zhang const PetscInt *rstart; 206267602552SJunchao Zhang Vec v_mpi, msol_loc; 206367602552SJunchao Zhang VecScatter scat_sol; 206467602552SJunchao Zhang Vec b_seq; 206567602552SJunchao Zhang VecScatter scat_rhs; 2066be818407SHong Zhang PetscScalar *aa; 2067be818407SHong Zhang PetscInt spnr, *ia, *ja; 2068d56c302dSHong Zhang Mat_MPIAIJ *b = NULL; 2069bda8bf91SBarry Smith 2070e0b74bf9SHong Zhang PetscFunctionBegin; 20719566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 207228b400f6SJacob Faibussowitsch PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 2073be818407SHong Zhang 20749566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 2075cf053153SJunchao Zhang 2076a6053eceSJunchao Zhang if (denseB) { 207708401ef6SPierre Jolivet PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 2078be818407SHong Zhang mumps->id.ICNTL(20) = 0; /* dense RHS */ 20790e6b8875SHong Zhang } else { /* sparse B */ 208008401ef6SPierre Jolivet PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 2081013e2dc7SBarry Smith PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 208253587d93SPierre Jolivet PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 208353587d93SPierre Jolivet PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 208453587d93SPierre Jolivet /* input B is transpose of actual RHS matrix, 20850e6b8875SHong Zhang because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 20869566063dSJacob Faibussowitsch PetscCall(MatTransposeGetMat(B, &Bt)); 2087be818407SHong Zhang mumps->id.ICNTL(20) = 1; /* sparse RHS */ 2088b8491c3eSStefano Zampini } 208987b22cf4SHong Zhang 20909566063dSJacob Faibussowitsch PetscCall(MatGetSize(B, &M, &nrhs)); 2091917c3dccSPierre Jolivet PetscCall(PetscIntMultError(nrhs, M, &nrhsM)); 20926497c311SBarry Smith mumps->id.nrhs = (PetscMUMPSInt)nrhs; 20936497c311SBarry Smith mumps->id.lrhs = (PetscMUMPSInt)M; 20949481e6e9SHong Zhang 2095cf053153SJunchao Zhang if (mumps->petsc_size == 1) { // handle this easy case specially and return early 2096b8491c3eSStefano Zampini PetscScalar *aa; 2097b8491c3eSStefano Zampini PetscInt spnr, *ia, *ja; 2098e94cce23SStefano Zampini PetscBool second_solve = PETSC_FALSE; 2099b8491c3eSStefano Zampini 21009566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &array)); 2101a6053eceSJunchao Zhang if (denseB) { 21022b691707SHong Zhang /* copy B to X */ 2103cf053153SJunchao Zhang PetscCall(MatDenseGetArrayRead(B, &barray)); 2104cf053153SJunchao Zhang PetscCall(PetscArraycpy(array, barray, nrhsM)); 2105cf053153SJunchao Zhang PetscCall(MatDenseRestoreArrayRead(B, &barray)); 21062b691707SHong Zhang } else { /* sparse B */ 21079566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(Bt, &aa)); 21089566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 210928b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 21109566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2111cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 2112b8491c3eSStefano Zampini } 2113cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 2114cf053153SJunchao Zhang 2115e94cce23SStefano Zampini /* handle condensation step of Schur complement (if any) */ 21163e5b40d0SPierre Jolivet if (mumps->id.size_schur > 0) { 21173e5b40d0SPierre Jolivet if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 2118e94cce23SStefano Zampini second_solve = PETSC_TRUE; 2119cf053153SJunchao Zhang PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs 2120cf053153SJunchao Zhang mumps->id.ICNTL(26) = 1; /* condensation phase, i.e, to solve id.redrhs */ 21213e5b40d0SPierre Jolivet } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 2122e94cce23SStefano Zampini } 2123cf053153SJunchao Zhang 21242cd7d884SHong Zhang mumps->id.job = JOB_SOLVE; 21253ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 2126cf053153SJunchao Zhang PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2127b5fa320bSStefano Zampini 2128b5fa320bSStefano Zampini /* handle expansion step of Schur complement (if any) */ 21291baa6e33SBarry Smith if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 2130cf053153SJunchao Zhang else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side 21313e5b40d0SPierre Jolivet PetscCall(MatMumpsSolveSchur_Private(A)); 21323e5b40d0SPierre Jolivet for (j = 0; j < nrhs; ++j) 2133cf053153SJunchao Zhang for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs); 21343e5b40d0SPierre Jolivet } 2135cf053153SJunchao Zhang 2136cf053153SJunchao Zhang if (!denseB) { /* sparse B, restore ia, ja */ 21379566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 21389566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 213928b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 2140b8491c3eSStefano Zampini } 2141cf053153SJunchao Zhang 2142cf053153SJunchao Zhang // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X. 2143cf053153SJunchao Zhang PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array)); 21449566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &array)); 21453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2146be818407SHong Zhang } 2147801fbe65SHong Zhang 21482ef1f0ffSBarry Smith /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 214950a7cd33SPierre Jolivet PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 2150241dbb5eSStefano Zampini 2151beae5ec0SHong Zhang /* create msol_loc to hold mumps local solution */ 2152cf053153SJunchao Zhang isol_loc_save = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */ 2153cf053153SJunchao Zhang sol_loc_save = mumps->id.sol_loc; 2154cf053153SJunchao Zhang sol_loc_len_save = mumps->id.sol_loc_len; 2155cf053153SJunchao Zhang mumps->id.isol_loc = NULL; // an init state 2156cf053153SJunchao Zhang mumps->id.sol_loc = NULL; 2157cf053153SJunchao Zhang mumps->id.sol_loc_len = 0; 2158801fbe65SHong Zhang 2159a1dfcbd9SJunchao Zhang lsol_loc = mumps->id.lsol_loc; 2160917c3dccSPierre Jolivet PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */ 21619566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 2162cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc)); 2163801fbe65SHong Zhang mumps->id.isol_loc = isol_loc; 2164801fbe65SHong Zhang 21659566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 21662cd7d884SHong Zhang 216767602552SJunchao Zhang if (denseB) { 216825aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 216967602552SJunchao Zhang mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 2170cf053153SJunchao Zhang PetscCall(MatDenseGetArrayRead(B, &barray)); 2171cf053153SJunchao Zhang PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc 2172cf053153SJunchao Zhang PetscCall(MatDenseRestoreArrayRead(B, &barray)); 21739566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(B, &m, NULL)); 2174cf053153SJunchao Zhang PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X 217525aac85cSJunchao Zhang } else { 217625aac85cSJunchao Zhang mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 217780577c12SJunchao Zhang /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 217880577c12SJunchao Zhang very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 217980577c12SJunchao Zhang 0, re-arrange B into desired order, which is a local operation. 218080577c12SJunchao Zhang */ 218180577c12SJunchao Zhang 218267602552SJunchao Zhang /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 2183be818407SHong Zhang /* wrap dense rhs matrix B into a vector v_mpi */ 21849566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(B, &m, NULL)); 2185cf053153SJunchao Zhang PetscCall(MatDenseGetArrayRead(B, &barray)); 2186cf053153SJunchao Zhang PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi)); 2187cf053153SJunchao Zhang PetscCall(MatDenseRestoreArrayRead(B, &barray)); 21882b691707SHong Zhang 2189cf053153SJunchao Zhang /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */ 2190801fbe65SHong Zhang if (!mumps->myid) { 2191beae5ec0SHong Zhang PetscInt *idx; 2192beae5ec0SHong Zhang /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 2193917c3dccSPierre Jolivet PetscCall(PetscMalloc1(nrhsM, &idx)); 21949566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(B, &rstart)); 2195917c3dccSPierre Jolivet for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) { 2196be818407SHong Zhang for (j = 0; j < nrhs; j++) { 2197beae5ec0SHong Zhang for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 2198be818407SHong Zhang } 2199be818407SHong Zhang } 2200be818407SHong Zhang 2201917c3dccSPierre Jolivet PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq)); 2202917c3dccSPierre Jolivet PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to)); 2203917c3dccSPierre Jolivet PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from)); 2204801fbe65SHong Zhang } else { 22059566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 22069566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 22079566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 2208801fbe65SHong Zhang } 2209cf053153SJunchao Zhang 22109566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 22119566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 22129566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_to)); 22139566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_from)); 22149566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 2215801fbe65SHong Zhang 2216801fbe65SHong Zhang if (!mumps->myid) { /* define rhs on the host */ 2217cf053153SJunchao Zhang PetscCall(VecGetArrayRead(b_seq, &barray)); 2218cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 2219cf053153SJunchao Zhang PetscCall(VecRestoreArrayRead(b_seq, &barray)); 2220801fbe65SHong Zhang } 222125aac85cSJunchao Zhang } 22222b691707SHong Zhang } else { /* sparse B */ 22232b691707SHong Zhang b = (Mat_MPIAIJ *)Bt->data; 22242b691707SHong Zhang 2225be818407SHong Zhang /* wrap dense X into a vector v_mpi */ 22269566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(X, &m, NULL)); 2227cf053153SJunchao Zhang PetscCall(MatDenseGetArrayRead(X, &barray)); 2228cf053153SJunchao Zhang PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi)); 2229cf053153SJunchao Zhang PetscCall(MatDenseRestoreArrayRead(X, &barray)); 22302b691707SHong Zhang 22312b691707SHong Zhang if (!mumps->myid) { 22329566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(b->A, &aa)); 22339566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 223428b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 22359566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2236cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 22372b691707SHong Zhang } else { 22382b691707SHong Zhang mumps->id.irhs_ptr = NULL; 22392b691707SHong Zhang mumps->id.irhs_sparse = NULL; 22402b691707SHong Zhang mumps->id.nz_rhs = 0; 2241cf053153SJunchao Zhang if (mumps->id.rhs_sparse_len) { 2242cf053153SJunchao Zhang PetscCall(PetscFree(mumps->id.rhs_sparse)); 2243cf053153SJunchao Zhang mumps->id.rhs_sparse_len = 0; 2244cf053153SJunchao Zhang } 22452b691707SHong Zhang } 22462b691707SHong Zhang } 22472b691707SHong Zhang 2248801fbe65SHong Zhang /* solve phase */ 2249801fbe65SHong Zhang mumps->id.job = JOB_SOLVE; 22503ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 22519261f6e4SBarry Smith PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2252801fbe65SHong Zhang 2253f0b74427SPierre Jolivet /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */ 22549566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &array)); 22559566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(v_mpi, array)); 2256801fbe65SHong Zhang 2257334c5f61SHong Zhang /* create scatter scat_sol */ 22589566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(X, &rstart)); 2259f0b74427SPierre Jolivet /* iidx: index for scatter mumps solution to PETSc X */ 2260beae5ec0SHong Zhang 22619566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 22629566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 2263beae5ec0SHong Zhang for (i = 0; i < lsol_loc; i++) { 2264beae5ec0SHong Zhang isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 2265beae5ec0SHong Zhang 22662d4298aeSJunchao Zhang for (proc = 0; proc < mumps->petsc_size; proc++) { 2267beae5ec0SHong Zhang if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 2268beae5ec0SHong Zhang myrstart = rstart[proc]; 2269f0b74427SPierre Jolivet k = isol_loc[i] - myrstart; /* local index on 1st column of PETSc vector X */ 2270f0b74427SPierre Jolivet iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to PETSc index in X */ 2271beae5ec0SHong Zhang m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 2272beae5ec0SHong Zhang break; 2273be818407SHong Zhang } 2274be818407SHong Zhang } 2275be818407SHong Zhang 2276beae5ec0SHong Zhang for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 2277801fbe65SHong Zhang } 22789566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 2279cf053153SJunchao Zhang PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[] 22809566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 22819566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 22829566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_from)); 22839566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_to)); 22849566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 22859566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &array)); 228671aed81dSHong Zhang 2287cf053153SJunchao Zhang if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers 2288cf053153SJunchao Zhang mumps->id.sol_loc_len = 0; 2289cf053153SJunchao Zhang PetscCall(PetscFree(mumps->id.sol_loc)); 2290cf053153SJunchao Zhang } 2291cf053153SJunchao Zhang 2292cf053153SJunchao Zhang // restore old values 2293cf053153SJunchao Zhang mumps->id.sol_loc = sol_loc_save; 2294cf053153SJunchao Zhang mumps->id.sol_loc_len = sol_loc_len_save; 229571aed81dSHong Zhang mumps->id.isol_loc = isol_loc_save; 229671aed81dSHong Zhang 22979566063dSJacob Faibussowitsch PetscCall(PetscFree2(sol_loc, isol_loc)); 22989566063dSJacob Faibussowitsch PetscCall(PetscFree(idxx)); 22999566063dSJacob Faibussowitsch PetscCall(VecDestroy(&msol_loc)); 23009566063dSJacob Faibussowitsch PetscCall(VecDestroy(&v_mpi)); 2301a6053eceSJunchao Zhang if (!denseB) { 23022b691707SHong Zhang if (!mumps->myid) { 2303d56c302dSHong Zhang b = (Mat_MPIAIJ *)Bt->data; 23049566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 23059566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 230628b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 23072b691707SHong Zhang } 23082b691707SHong Zhang } else { 230925aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { 23109566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b_seq)); 23119566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&scat_rhs)); 231225aac85cSJunchao Zhang } 23132b691707SHong Zhang } 23149566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&scat_sol)); 231557508eceSPierre Jolivet PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 23163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2317e0b74bf9SHong Zhang } 2318e0b74bf9SHong Zhang 231966976f2fSJacob Faibussowitsch static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 2320d71ae5a4SJacob Faibussowitsch { 2321b18964edSHong Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2322338d3105SPierre Jolivet const PetscMUMPSInt value = mumps->id.ICNTL(9); 2323b18964edSHong Zhang 2324b18964edSHong Zhang PetscFunctionBegin; 2325b18964edSHong Zhang mumps->id.ICNTL(9) = 0; 2326b18964edSHong Zhang PetscCall(MatMatSolve_MUMPS(A, B, X)); 2327338d3105SPierre Jolivet mumps->id.ICNTL(9) = value; 23283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2329b18964edSHong Zhang } 2330b18964edSHong Zhang 233166976f2fSJacob Faibussowitsch static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 2332d71ae5a4SJacob Faibussowitsch { 2333eb3ef3b2SHong Zhang PetscBool flg; 2334eb3ef3b2SHong Zhang Mat B; 2335eb3ef3b2SHong Zhang 2336eb3ef3b2SHong Zhang PetscFunctionBegin; 23379566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 233828b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 2339eb3ef3b2SHong Zhang 2340eb3ef3b2SHong Zhang /* Create B=Bt^T that uses Bt's data structure */ 23419566063dSJacob Faibussowitsch PetscCall(MatCreateTranspose(Bt, &B)); 2342eb3ef3b2SHong Zhang 23439566063dSJacob Faibussowitsch PetscCall(MatMatSolve_MUMPS(A, B, X)); 23449566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 23453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2346eb3ef3b2SHong Zhang } 2347eb3ef3b2SHong Zhang 2348ace3df97SHong Zhang #if !defined(PETSC_USE_COMPLEX) 2349a58c3f20SHong Zhang /* 2350a58c3f20SHong Zhang input: 2351a58c3f20SHong Zhang F: numeric factor 2352a58c3f20SHong Zhang output: 2353a58c3f20SHong Zhang nneg: total number of negative pivots 235419d49a3bSHong Zhang nzero: total number of zero pivots 235519d49a3bSHong Zhang npos: (global dimension of F) - nneg - nzero 2356a58c3f20SHong Zhang */ 235766976f2fSJacob Faibussowitsch static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 2358d71ae5a4SJacob Faibussowitsch { 2359e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2360c1490034SHong Zhang PetscMPIInt size; 2361a58c3f20SHong Zhang 2362a58c3f20SHong Zhang PetscFunctionBegin; 23639566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 2364bcb30aebSHong Zhang /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 2365aed4548fSBarry Smith PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 2366ed85ac9fSHong Zhang 2367710ac8efSHong Zhang if (nneg) *nneg = mumps->id.INFOG(12); 2368ed85ac9fSHong Zhang if (nzero || npos) { 236908401ef6SPierre Jolivet PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2370710ac8efSHong Zhang if (nzero) *nzero = mumps->id.INFOG(28); 2371710ac8efSHong Zhang if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 2372a58c3f20SHong Zhang } 23733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2374a58c3f20SHong Zhang } 237519d49a3bSHong Zhang #endif 2376a58c3f20SHong Zhang 237766976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 2378d71ae5a4SJacob Faibussowitsch { 23796497c311SBarry Smith PetscMPIInt nreqs; 2380a6053eceSJunchao Zhang PetscMUMPSInt *irn, *jcn; 2381a6053eceSJunchao Zhang PetscMPIInt count; 23826497c311SBarry Smith PetscCount totnnz, remain; 2383a6053eceSJunchao Zhang const PetscInt osize = mumps->omp_comm_size; 2384a6053eceSJunchao Zhang PetscScalar *val; 23853ab56b82SJunchao Zhang 23863ab56b82SJunchao Zhang PetscFunctionBegin; 2387a6053eceSJunchao Zhang if (osize > 1) { 23883ab56b82SJunchao Zhang if (reuse == MAT_INITIAL_MATRIX) { 23893ab56b82SJunchao Zhang /* master first gathers counts of nonzeros to receive */ 23909566063dSJacob Faibussowitsch if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 23919566063dSJacob Faibussowitsch PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 23923ab56b82SJunchao Zhang 2393a6053eceSJunchao Zhang /* Then each computes number of send/recvs */ 23943ab56b82SJunchao Zhang if (mumps->is_omp_master) { 2395a6053eceSJunchao Zhang /* Start from 1 since self communication is not done in MPI */ 2396a6053eceSJunchao Zhang nreqs = 0; 23976497c311SBarry Smith for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 2398a6053eceSJunchao Zhang } else { 23996497c311SBarry Smith nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX)); 24003ab56b82SJunchao Zhang } 240135cb6cd3SPierre Jolivet PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 24023ab56b82SJunchao Zhang 2403a6053eceSJunchao Zhang /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 2404a6053eceSJunchao Zhang MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 2405a6053eceSJunchao Zhang might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 2406a6053eceSJunchao Zhang is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 2407a6053eceSJunchao Zhang */ 2408a6053eceSJunchao Zhang nreqs = 0; /* counter for actual send/recvs */ 24093ab56b82SJunchao Zhang if (mumps->is_omp_master) { 24106497c311SBarry Smith totnnz = 0; 24116497c311SBarry Smith 24126497c311SBarry Smith for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 24139566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 24149566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(totnnz, &val)); 2415a6053eceSJunchao Zhang 2416a6053eceSJunchao Zhang /* Self communication */ 24179566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 24189566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 24199566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 2420a6053eceSJunchao Zhang 2421a6053eceSJunchao Zhang /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 24229566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 24239566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->val_alloc)); 2424a6053eceSJunchao Zhang mumps->nnz = totnnz; 24253ab56b82SJunchao Zhang mumps->irn = irn; 24263ab56b82SJunchao Zhang mumps->jcn = jcn; 2427a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 2428a6053eceSJunchao Zhang 2429a6053eceSJunchao Zhang irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 2430a6053eceSJunchao Zhang jcn += mumps->recvcount[0]; 2431a6053eceSJunchao Zhang val += mumps->recvcount[0]; 2432a6053eceSJunchao Zhang 2433a6053eceSJunchao Zhang /* Remote communication */ 24346497c311SBarry Smith for (PetscMPIInt i = 1; i < osize; i++) { 24356497c311SBarry Smith count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2436a6053eceSJunchao Zhang remain = mumps->recvcount[i] - count; 2437a6053eceSJunchao Zhang while (count > 0) { 24386497c311SBarry Smith PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 24396497c311SBarry Smith PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 24406497c311SBarry Smith PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2441a6053eceSJunchao Zhang irn += count; 2442a6053eceSJunchao Zhang jcn += count; 2443a6053eceSJunchao Zhang val += count; 24446497c311SBarry Smith count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2445a6053eceSJunchao Zhang remain -= count; 2446a6053eceSJunchao Zhang } 24473ab56b82SJunchao Zhang } 24483ab56b82SJunchao Zhang } else { 2449a6053eceSJunchao Zhang irn = mumps->irn; 2450a6053eceSJunchao Zhang jcn = mumps->jcn; 2451a6053eceSJunchao Zhang val = mumps->val; 24526497c311SBarry Smith count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2453a6053eceSJunchao Zhang remain = mumps->nnz - count; 2454a6053eceSJunchao Zhang while (count > 0) { 24556497c311SBarry Smith PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 24566497c311SBarry Smith PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 24576497c311SBarry Smith PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2458a6053eceSJunchao Zhang irn += count; 2459a6053eceSJunchao Zhang jcn += count; 2460a6053eceSJunchao Zhang val += count; 24616497c311SBarry Smith count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2462a6053eceSJunchao Zhang remain -= count; 24633ab56b82SJunchao Zhang } 24643ab56b82SJunchao Zhang } 2465a6053eceSJunchao Zhang } else { 2466a6053eceSJunchao Zhang nreqs = 0; 2467a6053eceSJunchao Zhang if (mumps->is_omp_master) { 2468a6053eceSJunchao Zhang val = mumps->val + mumps->recvcount[0]; 24696497c311SBarry Smith for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 24706497c311SBarry Smith count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2471a6053eceSJunchao Zhang remain = mumps->recvcount[i] - count; 2472a6053eceSJunchao Zhang while (count > 0) { 24736497c311SBarry Smith PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2474a6053eceSJunchao Zhang val += count; 24756497c311SBarry Smith count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2476a6053eceSJunchao Zhang remain -= count; 2477a6053eceSJunchao Zhang } 2478a6053eceSJunchao Zhang } 2479a6053eceSJunchao Zhang } else { 2480a6053eceSJunchao Zhang val = mumps->val; 24816497c311SBarry Smith count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2482a6053eceSJunchao Zhang remain = mumps->nnz - count; 2483a6053eceSJunchao Zhang while (count > 0) { 24846497c311SBarry Smith PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2485a6053eceSJunchao Zhang val += count; 24866497c311SBarry Smith count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2487a6053eceSJunchao Zhang remain -= count; 2488a6053eceSJunchao Zhang } 2489a6053eceSJunchao Zhang } 2490a6053eceSJunchao Zhang } 24919566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 2492a6053eceSJunchao Zhang mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 2493a6053eceSJunchao Zhang } 24943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 24953ab56b82SJunchao Zhang } 24963ab56b82SJunchao Zhang 2497d2a308c1SPierre Jolivet static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info) 2498d71ae5a4SJacob Faibussowitsch { 249957508eceSPierre Jolivet Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2500397b6df1SKris Buschelman 2501397b6df1SKris Buschelman PetscFunctionBegin; 2502dbf6bb8dSprj- if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 250348a46eb9SPierre Jolivet if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 25049566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 25053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25062aca8efcSHong Zhang } 25076baea169SHong Zhang 25089566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 25099566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 2510397b6df1SKris Buschelman 2511397b6df1SKris Buschelman /* numerical factorization phase */ 2512a5e57a09SHong Zhang mumps->id.job = JOB_FACTNUMERIC; 25134e34a73bSHong Zhang if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2514cf053153SJunchao Zhang if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2515397b6df1SKris Buschelman } else { 2516cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2517397b6df1SKris Buschelman } 2518cf053153SJunchao Zhang 2519cf053153SJunchao Zhang if (F->schur) { 2520cf053153SJunchao Zhang const PetscScalar *array; 2521cf053153SJunchao Zhang MUMPS_INT size = mumps->id.size_schur; 2522cf053153SJunchao Zhang PetscCall(MatDenseGetArrayRead(F->schur, &array)); 2523cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur)); 2524cf053153SJunchao Zhang PetscCall(MatDenseRestoreArrayRead(F->schur, &array)); 2525cf053153SJunchao Zhang } 2526cf053153SJunchao Zhang 25273ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 2528a5e57a09SHong Zhang if (mumps->id.INFOG(1) < 0) { 25299261f6e4SBarry Smith PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 25309261f6e4SBarry Smith if (mumps->id.INFOG(1) == -10) { 25319261f6e4SBarry Smith PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2532603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2533c0d63f2fSHong Zhang } else if (mumps->id.INFOG(1) == -13) { 25349261f6e4SBarry Smith PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2535603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2536c0d63f2fSHong Zhang } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2537bdcd51b8SPierre Jolivet PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2538603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 25392aca8efcSHong Zhang } else { 25409261f6e4SBarry Smith PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2541603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OTHER; 2542151787a6SHong Zhang } 25432aca8efcSHong Zhang } 25449261f6e4SBarry Smith PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2545397b6df1SKris Buschelman 2546b3cb21ddSStefano Zampini F->assembled = PETSC_TRUE; 2547d47f36abSHong Zhang 2548b3cb21ddSStefano Zampini if (F->schur) { /* reset Schur status to unfactored */ 25493cb7dd0eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 2550c70f7ee4SJunchao Zhang F->schur->offloadmask = PETSC_OFFLOAD_CPU; 25513cb7dd0eSStefano Zampini #endif 2552cf053153SJunchao Zhang PetscScalar *array; 2553cf053153SJunchao Zhang PetscCall(MatDenseGetArray(F->schur, &array)); 2554cf053153SJunchao Zhang PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array)); 2555cf053153SJunchao Zhang PetscCall(MatDenseRestoreArray(F->schur, &array)); 2556b3cb21ddSStefano Zampini if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2557b3cb21ddSStefano Zampini mumps->id.ICNTL(19) = 2; 25589566063dSJacob Faibussowitsch PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2559b3cb21ddSStefano Zampini } 25609566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2561b3cb21ddSStefano Zampini } 256267877ebaSShri Abhyankar 2563066565c5SStefano Zampini /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2564066565c5SStefano Zampini if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2565066565c5SStefano Zampini 25663ab56b82SJunchao Zhang if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2567cf053153SJunchao Zhang // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the 2568cf053153SJunchao Zhang // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for 2569cf053153SJunchao Zhang // which the solution (in SOL_loc) is available on the local processor. 2570cf053153SJunchao Zhang // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will 2571cf053153SJunchao Zhang // normally have the same contents for each of these calls. The only exception is the case of 2572cf053153SJunchao Zhang // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non 2573cf053153SJunchao Zhang // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase. 25742d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 257567877ebaSShri Abhyankar PetscInt lsol_loc; 2576cf053153SJunchao Zhang PetscScalar *array; 2577c2093ab7SHong Zhang 2578c2093ab7SHong Zhang /* distributed solution; Create x_seq=sol_loc for repeated use */ 2579c2093ab7SHong Zhang if (mumps->x_seq) { 25809566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2581cf053153SJunchao Zhang PetscCall(PetscFree(mumps->id.isol_loc)); 25829566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->x_seq)); 2583c2093ab7SHong Zhang } 2584a5e57a09SHong Zhang lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2585cf053153SJunchao Zhang PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc)); 2586cf053153SJunchao Zhang PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq)); 2587cf053153SJunchao Zhang PetscCall(VecGetArray(mumps->x_seq, &array)); 2588cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc)); 2589cf053153SJunchao Zhang PetscCall(VecRestoreArray(mumps->x_seq, &array)); 25906497c311SBarry Smith mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc; 259167877ebaSShri Abhyankar } 2592cf053153SJunchao Zhang PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2))); 25933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2594397b6df1SKris Buschelman } 2595397b6df1SKris Buschelman 25969a2535b5SHong Zhang /* Sets MUMPS options from the options database */ 259766976f2fSJacob Faibussowitsch static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2598d71ae5a4SJacob Faibussowitsch { 2599e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2600cf053153SJunchao Zhang PetscReal cntl; 2601413bcc21SPierre Jolivet PetscMUMPSInt icntl = 0, size, *listvar_schur; 260245e3843bSPierre Jolivet PetscInt info[80], i, ninfo = 80, rbs, cbs; 2603cf053153SJunchao Zhang PetscBool flg = PETSC_FALSE; 2604cf053153SJunchao Zhang PetscBool schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1); 2605cf053153SJunchao Zhang void *arr; 2606dcd589f8SShri Abhyankar 2607dcd589f8SShri Abhyankar PetscFunctionBegin; 260826cc229bSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2609413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2610cf053153SJunchao Zhang PetscPrecision precision = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE; 2611413bcc21SPierre Jolivet PetscInt nthreads = 0; 2612413bcc21SPierre Jolivet PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2613413bcc21SPierre Jolivet PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 261493d70b8aSPierre Jolivet PetscMUMPSInt nblk, *blkvar, *blkptr; 2615413bcc21SPierre Jolivet 2616413bcc21SPierre Jolivet mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2617413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2618413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2619413bcc21SPierre Jolivet 2620413bcc21SPierre Jolivet PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2621413bcc21SPierre Jolivet if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2622413bcc21SPierre Jolivet /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2623413bcc21SPierre Jolivet PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2624413bcc21SPierre Jolivet if (mumps->use_petsc_omp_support) { 2625413bcc21SPierre Jolivet PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2626413bcc21SPierre Jolivet #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2627413bcc21SPierre Jolivet PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2628413bcc21SPierre Jolivet PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2629ea17275aSJose E. Roman #else 2630ea17275aSJose E. Roman SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2631ea17275aSJose E. Roman ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2632413bcc21SPierre Jolivet #endif 2633413bcc21SPierre Jolivet } else { 2634413bcc21SPierre Jolivet mumps->omp_comm = PETSC_COMM_SELF; 2635413bcc21SPierre Jolivet mumps->mumps_comm = mumps->petsc_comm; 2636413bcc21SPierre Jolivet mumps->is_omp_master = PETSC_TRUE; 2637413bcc21SPierre Jolivet } 2638413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2639413bcc21SPierre Jolivet mumps->reqs = NULL; 2640413bcc21SPierre Jolivet mumps->tag = 0; 2641413bcc21SPierre Jolivet 2642413bcc21SPierre Jolivet if (mumps->mumps_comm != MPI_COMM_NULL) { 2643413bcc21SPierre Jolivet if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2644413bcc21SPierre Jolivet /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2645413bcc21SPierre Jolivet MPI_Comm comm; 2646413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2647413bcc21SPierre Jolivet mumps->mumps_comm = comm; 2648413bcc21SPierre Jolivet } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2649413bcc21SPierre Jolivet } 2650413bcc21SPierre Jolivet 2651413bcc21SPierre Jolivet mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2652413bcc21SPierre Jolivet mumps->id.job = JOB_INIT; 2653413bcc21SPierre Jolivet mumps->id.par = 1; /* host participates factorizaton and solve */ 2654413bcc21SPierre Jolivet mumps->id.sym = mumps->sym; 2655413bcc21SPierre Jolivet 2656413bcc21SPierre Jolivet size = mumps->id.size_schur; 2657413bcc21SPierre Jolivet arr = mumps->id.schur; 2658413bcc21SPierre Jolivet listvar_schur = mumps->id.listvar_schur; 265993d70b8aSPierre Jolivet nblk = mumps->id.nblk; 266093d70b8aSPierre Jolivet blkvar = mumps->id.blkvar; 266193d70b8aSPierre Jolivet blkptr = mumps->id.blkptr; 266296eb7ee0SStefano Zampini if (PetscDefined(USE_DEBUG)) { 266396eb7ee0SStefano Zampini for (PetscInt i = 0; i < size; i++) 266496eb7ee0SStefano Zampini PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1, 266596eb7ee0SStefano Zampini A->rmap->N); 266696eb7ee0SStefano Zampini } 266796eb7ee0SStefano Zampini 2668cf053153SJunchao Zhang PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL)); 2669cf053153SJunchao Zhang PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]); 2670cf053153SJunchao Zhang PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar"); 2671cf053153SJunchao Zhang PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision)); 2672cf053153SJunchao Zhang 2673413bcc21SPierre Jolivet PetscMUMPS_c(mumps); 26749261f6e4SBarry Smith PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 267551ad14ebSPierre Jolivet 267651ad14ebSPierre Jolivet /* set PETSc-MUMPS default options - override MUMPS default */ 267751ad14ebSPierre Jolivet mumps->id.ICNTL(3) = 0; 267851ad14ebSPierre Jolivet mumps->id.ICNTL(4) = 0; 267951ad14ebSPierre Jolivet if (mumps->petsc_size == 1) { 268051ad14ebSPierre Jolivet mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 268151ad14ebSPierre Jolivet mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 268251ad14ebSPierre Jolivet } else { 268351ad14ebSPierre Jolivet mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 268451ad14ebSPierre Jolivet mumps->id.ICNTL(21) = 1; /* distributed solution */ 268551ad14ebSPierre Jolivet } 268693d70b8aSPierre Jolivet if (nblk && blkptr) { 268793d70b8aSPierre Jolivet mumps->id.ICNTL(15) = 1; 268893d70b8aSPierre Jolivet mumps->id.nblk = nblk; 268993d70b8aSPierre Jolivet mumps->id.blkvar = blkvar; 269093d70b8aSPierre Jolivet mumps->id.blkptr = blkptr; 2691cf053153SJunchao Zhang } else mumps->id.ICNTL(15) = 0; 269251ad14ebSPierre Jolivet 2693413bcc21SPierre Jolivet /* restore cached ICNTL and CNTL values */ 2694413bcc21SPierre Jolivet for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2695cf053153SJunchao Zhang for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]); 2696cf053153SJunchao Zhang 2697413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->ICNTL_pre)); 2698413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->CNTL_pre)); 2699413bcc21SPierre Jolivet 2700413bcc21SPierre Jolivet if (schur) { 2701413bcc21SPierre Jolivet mumps->id.size_schur = size; 2702413bcc21SPierre Jolivet mumps->id.schur_lld = size; 2703413bcc21SPierre Jolivet mumps->id.schur = arr; 2704413bcc21SPierre Jolivet mumps->id.listvar_schur = listvar_schur; 2705413bcc21SPierre Jolivet if (mumps->petsc_size > 1) { 2706413bcc21SPierre Jolivet PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2707413bcc21SPierre Jolivet 2708413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2709413bcc21SPierre Jolivet gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 27105440e5dcSBarry Smith PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm)); 2711413bcc21SPierre Jolivet PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2712413bcc21SPierre Jolivet } else { 2713413bcc21SPierre Jolivet if (F->factortype == MAT_FACTOR_LU) { 2714413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2715413bcc21SPierre Jolivet } else { 2716413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2717413bcc21SPierre Jolivet } 2718413bcc21SPierre Jolivet } 2719413bcc21SPierre Jolivet mumps->id.ICNTL(26) = -1; 2720413bcc21SPierre Jolivet } 2721413bcc21SPierre Jolivet 2722413bcc21SPierre Jolivet /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2723413bcc21SPierre Jolivet For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2724413bcc21SPierre Jolivet */ 2725413bcc21SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2726cf053153SJunchao Zhang PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); 2727413bcc21SPierre Jolivet 2728413bcc21SPierre Jolivet mumps->scat_rhs = NULL; 2729413bcc21SPierre Jolivet mumps->scat_sol = NULL; 2730413bcc21SPierre Jolivet } 27319566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 27329a2535b5SHong Zhang if (flg) mumps->id.ICNTL(1) = icntl; 27339566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 27349a2535b5SHong Zhang if (flg) mumps->id.ICNTL(2) = icntl; 27359566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 27369a2535b5SHong Zhang if (flg) mumps->id.ICNTL(3) = icntl; 2737dcd589f8SShri Abhyankar 27389566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 27399a2535b5SHong Zhang if (flg) mumps->id.ICNTL(4) = icntl; 27409a2535b5SHong Zhang if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 27419a2535b5SHong Zhang 27429566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 27439a2535b5SHong Zhang if (flg) mumps->id.ICNTL(6) = icntl; 27449a2535b5SHong Zhang 27459566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2746dcd589f8SShri Abhyankar if (flg) { 2747aed4548fSBarry Smith PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2748b53c1a7fSBarry Smith mumps->id.ICNTL(7) = icntl; 2749dcd589f8SShri Abhyankar } 2750e0b74bf9SHong Zhang 27519566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 27529566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 27539566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 27549566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 27559566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 27569566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 27579566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 275845e3843bSPierre Jolivet PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 27596497c311SBarry Smith if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs; 276045e3843bSPierre Jolivet PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 276145e3843bSPierre Jolivet if (flg) { 276293d70b8aSPierre Jolivet if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 276393d70b8aSPierre Jolivet else if (mumps->id.ICNTL(15) > 0) { 276493d70b8aSPierre Jolivet const PetscInt *bsizes; 276593d70b8aSPierre Jolivet PetscInt nblocks, p, *blkptr = NULL; 276693d70b8aSPierre Jolivet PetscMPIInt *recvcounts, *displs, n; 276793d70b8aSPierre Jolivet PetscMPIInt rank, size = 0; 276893d70b8aSPierre Jolivet 276993d70b8aSPierre Jolivet PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes)); 277093d70b8aSPierre Jolivet flg = PETSC_TRUE; 277193d70b8aSPierre Jolivet for (p = 0; p < nblocks; ++p) { 277293d70b8aSPierre Jolivet if (bsizes[p] > 1) break; 277393d70b8aSPierre Jolivet } 277493d70b8aSPierre Jolivet if (p == nblocks) flg = PETSC_FALSE; 27755440e5dcSBarry Smith PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 277693d70b8aSPierre Jolivet if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1 277793d70b8aSPierre Jolivet PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 277893d70b8aSPierre Jolivet if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 277993d70b8aSPierre Jolivet PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs)); 278093d70b8aSPierre Jolivet PetscCall(PetscMPIIntCast(nblocks, &n)); 278193d70b8aSPierre Jolivet PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A))); 278293d70b8aSPierre Jolivet for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p]; 278393d70b8aSPierre Jolivet PetscCall(PetscMalloc1(displs[size] + 1, &blkptr)); 278493d70b8aSPierre Jolivet PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 278593d70b8aSPierre Jolivet PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 278693d70b8aSPierre Jolivet if (rank == 0) { 278793d70b8aSPierre Jolivet blkptr[0] = 1; 278893d70b8aSPierre Jolivet for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p]; 278993d70b8aSPierre Jolivet PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr)); 279093d70b8aSPierre Jolivet } 279193d70b8aSPierre Jolivet PetscCall(PetscFree2(recvcounts, displs)); 279293d70b8aSPierre Jolivet PetscCall(PetscFree(blkptr)); 279393d70b8aSPierre Jolivet } 279493d70b8aSPierre Jolivet } 279545e3843bSPierre Jolivet } 27969566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 279759ac8732SStefano Zampini if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 27989566063dSJacob Faibussowitsch PetscCall(MatDestroy(&F->schur)); 27999566063dSJacob Faibussowitsch PetscCall(MatMumpsResetSchur_Private(mumps)); 280059ac8732SStefano Zampini } 280125aac85cSJunchao Zhang 280243f3b051SJunchao Zhang /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 280343f3b051SJunchao Zhang and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 280425aac85cSJunchao Zhang and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 280543f3b051SJunchao Zhang This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 280643f3b051SJunchao Zhang see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 280741caa250SJunchao Zhang In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 280825aac85cSJunchao Zhang */ 2809c183326eSPierre Jolivet mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */ 2810c183326eSPierre Jolivet #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI) 2811c183326eSPierre Jolivet mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */ 281225aac85cSJunchao Zhang #endif 28139566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2814aed4548fSBarry Smith PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 281525aac85cSJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2816aed4548fSBarry Smith PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 281725aac85cSJunchao Zhang #endif 28189566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 28199a2535b5SHong Zhang 28209566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 28219566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 28229566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2823ac530a7eSPierre Jolivet if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 2824d7ebd59bSHong Zhang 28259566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 28269566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 28279566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2828fa6fd9d0SPierre Jolivet PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 28299566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 28309566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 28319566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2832145b44c9SPierre Jolivet /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 28339566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 28349566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 28359566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 283650ea2040Saszaboa PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL)); 28379566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2838c92b4f89SPierre Jolivet PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL)); 283991b026caSPierre Jolivet PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL)); 2840146931dbSPierre Jolivet PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2841dcd589f8SShri Abhyankar 2842cf053153SJunchao Zhang PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg)); 2843cf053153SJunchao Zhang if (flg) ID_CNTL_SET(mumps->id, 1, cntl); 2844cf053153SJunchao Zhang PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg)); 2845cf053153SJunchao Zhang if (flg) ID_CNTL_SET(mumps->id, 2, cntl); 2846cf053153SJunchao Zhang PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg)); 2847cf053153SJunchao Zhang if (flg) ID_CNTL_SET(mumps->id, 3, cntl); 2848cf053153SJunchao Zhang PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg)); 2849cf053153SJunchao Zhang if (flg) ID_CNTL_SET(mumps->id, 4, cntl); 2850cf053153SJunchao Zhang PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg)); 2851cf053153SJunchao Zhang if (flg) ID_CNTL_SET(mumps->id, 5, cntl); 2852cf053153SJunchao Zhang PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg)); 2853cf053153SJunchao Zhang if (flg) ID_CNTL_SET(mumps->id, 7, cntl); 2854e5bb22a1SHong Zhang 28559566063dSJacob Faibussowitsch PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2856b34f08ffSHong Zhang 28579566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2858b34f08ffSHong Zhang if (ninfo) { 285908401ef6SPierre Jolivet PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 28609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2861b34f08ffSHong Zhang mumps->ninfo = ninfo; 2862b34f08ffSHong Zhang for (i = 0; i < ninfo; i++) { 2863aed4548fSBarry Smith PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2864f7d195e4SLawrence Mitchell mumps->info[i] = info[i]; 2865b34f08ffSHong Zhang } 2866b34f08ffSHong Zhang } 2867d0609cedSBarry Smith PetscOptionsEnd(); 28683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2869dcd589f8SShri Abhyankar } 2870dcd589f8SShri Abhyankar 2871d2a308c1SPierre Jolivet static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps) 2872d71ae5a4SJacob Faibussowitsch { 28735cd7cf9dSHong Zhang PetscFunctionBegin; 28745cd7cf9dSHong Zhang if (mumps->id.INFOG(1) < 0) { 28759261f6e4SBarry Smith PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 28765cd7cf9dSHong Zhang if (mumps->id.INFOG(1) == -6) { 28779261f6e4SBarry Smith PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2878603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 28795cd7cf9dSHong Zhang } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 28809261f6e4SBarry Smith PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2881603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 28825cd7cf9dSHong Zhang } else { 28839261f6e4SBarry Smith PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2884603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OTHER; 28855cd7cf9dSHong Zhang } 28865cd7cf9dSHong Zhang } 288772b150d8SStefano Zampini if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR; 28883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28895cd7cf9dSHong Zhang } 28905cd7cf9dSHong Zhang 2891d2a308c1SPierre Jolivet static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2892d71ae5a4SJacob Faibussowitsch { 2893e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 289467877ebaSShri Abhyankar Vec b; 289567877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 2896397b6df1SKris Buschelman 2897397b6df1SKris Buschelman PetscFunctionBegin; 2898d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2899d47f36abSHong Zhang /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 29003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2901d47f36abSHong Zhang } 2902dcd589f8SShri Abhyankar 29039a2535b5SHong Zhang /* Set MUMPS options from the options database */ 290426cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 2905dcd589f8SShri Abhyankar 29069566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 29079566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2908dcd589f8SShri Abhyankar 290967877ebaSShri Abhyankar /* analysis phase */ 2910a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 29116497c311SBarry Smith PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2912a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 291367877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 2914a5e57a09SHong Zhang if (!mumps->myid) { 2915a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 2916a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2917a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2918cf053153SJunchao Zhang if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 291951ad14ebSPierre Jolivet if (r && mumps->id.ICNTL(7) == 7) { 29204ac6704cSBarry Smith mumps->id.ICNTL(7) = 1; 2921a5e57a09SHong Zhang if (!mumps->myid) { 2922e0b74bf9SHong Zhang const PetscInt *idx; 2923a6053eceSJunchao Zhang PetscInt i; 29242205254eSKarl Rupp 29259566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 29269566063dSJacob Faibussowitsch PetscCall(ISGetIndices(r, &idx)); 2927f4f49eeaSPierre Jolivet for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */ 29289566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(r, &idx)); 2929e0b74bf9SHong Zhang } 2930e0b74bf9SHong Zhang } 293167877ebaSShri Abhyankar } 293267877ebaSShri Abhyankar break; 293367877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2934a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2935a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2936a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2937cf053153SJunchao Zhang if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 293825aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 29399566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 29409566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 29419566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 294225aac85cSJunchao Zhang } 294367877ebaSShri Abhyankar break; 294467877ebaSShri Abhyankar } 29453ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 29469566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 294767877ebaSShri Abhyankar 2948719d5645SBarry Smith F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2949dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 295051d5961aSHong Zhang F->ops->solvetranspose = MatSolveTranspose_MUMPS; 29514e34a73bSHong Zhang F->ops->matsolve = MatMatSolve_MUMPS; 2952eb3ef3b2SHong Zhang F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2953b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2954d47f36abSHong Zhang 2955d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 29563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2957b24902e0SBarry Smith } 2958b24902e0SBarry Smith 2959f0b74427SPierre Jolivet /* Note the PETSc r and c permutations are ignored */ 2960d2a308c1SPierre Jolivet static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2961d71ae5a4SJacob Faibussowitsch { 2962e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 296367877ebaSShri Abhyankar Vec b; 296467877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 2965450b117fSShri Abhyankar 2966450b117fSShri Abhyankar PetscFunctionBegin; 2967d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2968338d3105SPierre Jolivet /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 29693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2970d47f36abSHong Zhang } 2971dcd589f8SShri Abhyankar 29729a2535b5SHong Zhang /* Set MUMPS options from the options database */ 297326cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 2974dcd589f8SShri Abhyankar 29759566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 29769566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 297767877ebaSShri Abhyankar 297867877ebaSShri Abhyankar /* analysis phase */ 2979a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 29806497c311SBarry Smith PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2981a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 298267877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 2983a5e57a09SHong Zhang if (!mumps->myid) { 2984a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 2985a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2986a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2987cf053153SJunchao Zhang if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 298867877ebaSShri Abhyankar } 298967877ebaSShri Abhyankar break; 299067877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2991a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2992a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2993a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2994cf053153SJunchao Zhang if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 299525aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 29969566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 29979566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 29989566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 299925aac85cSJunchao Zhang } 300067877ebaSShri Abhyankar break; 300167877ebaSShri Abhyankar } 30023ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 30039566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 300467877ebaSShri Abhyankar 3005450b117fSShri Abhyankar F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 3006dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 300751d5961aSHong Zhang F->ops->solvetranspose = MatSolveTranspose_MUMPS; 3008b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 3009d47f36abSHong Zhang 3010d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 30113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3012450b117fSShri Abhyankar } 3013b24902e0SBarry Smith 3014f0b74427SPierre Jolivet /* Note the PETSc r permutation and factor info are ignored */ 3015d2a308c1SPierre Jolivet static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info) 3016d71ae5a4SJacob Faibussowitsch { 3017e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 301867877ebaSShri Abhyankar Vec b; 301967877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 3020397b6df1SKris Buschelman 3021397b6df1SKris Buschelman PetscFunctionBegin; 3022d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 3023338d3105SPierre Jolivet /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 30243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3025d47f36abSHong Zhang } 3026dcd589f8SShri Abhyankar 30279a2535b5SHong Zhang /* Set MUMPS options from the options database */ 302826cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 3029dcd589f8SShri Abhyankar 30309566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 30319566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 3032dcd589f8SShri Abhyankar 303367877ebaSShri Abhyankar /* analysis phase */ 3034a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 30356497c311SBarry Smith PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 3036a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 303767877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 3038a5e57a09SHong Zhang if (!mumps->myid) { 3039a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 3040a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 3041a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 3042cf053153SJunchao Zhang if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 304367877ebaSShri Abhyankar } 304467877ebaSShri Abhyankar break; 304567877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 3046a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 3047a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 3048a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 3049cf053153SJunchao Zhang if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 305025aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 30519566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 30529566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 30539566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 305425aac85cSJunchao Zhang } 305567877ebaSShri Abhyankar break; 305667877ebaSShri Abhyankar } 30573ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 30589566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 30595cd7cf9dSHong Zhang 30602792810eSHong Zhang F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 3061dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 306251d5961aSHong Zhang F->ops->solvetranspose = MatSolve_MUMPS; 30634e34a73bSHong Zhang F->ops->matsolve = MatMatSolve_MUMPS; 306423a5080aSHong Zhang F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 3065b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 30664e34a73bSHong Zhang #if defined(PETSC_USE_COMPLEX) 30670298fd71SBarry Smith F->ops->getinertia = NULL; 30684e34a73bSHong Zhang #else 30694e34a73bSHong Zhang F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 3070db4efbfdSBarry Smith #endif 3071d47f36abSHong Zhang 3072d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 30733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3074b24902e0SBarry Smith } 3075b24902e0SBarry Smith 307666976f2fSJacob Faibussowitsch static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 3077d71ae5a4SJacob Faibussowitsch { 30789f196a02SMartin Diehl PetscBool isascii; 307964e6c443SBarry Smith PetscViewerFormat format; 3080e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 3081f6c57405SHong Zhang 3082f6c57405SHong Zhang PetscFunctionBegin; 308364e6c443SBarry Smith /* check if matrix is mumps type */ 30843ba16761SJacob Faibussowitsch if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 308564e6c443SBarry Smith 30869f196a02SMartin Diehl PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 30879f196a02SMartin Diehl if (isascii) { 30889566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 30891511cd71SPierre Jolivet if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 30909566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 30911511cd71SPierre Jolivet if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 30929566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 30939566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 30949566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 30959566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 30969566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 30979566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 30989566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 30999566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 31009566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 31019566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 31029566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 31039566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 3104a5e57a09SHong Zhang if (mumps->id.ICNTL(11) > 0) { 3105cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)ID_RINFOG_GET(mumps->id, 4))); 3106cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)ID_RINFOG_GET(mumps->id, 5))); 3107cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)ID_RINFOG_GET(mumps->id, 6))); 3108cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8))); 3109cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)ID_RINFOG_GET(mumps->id, 9))); 3110cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11))); 3111f6c57405SHong Zhang } 31129566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 31139566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 31149566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 311545e3843bSPierre Jolivet PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 3116f6c57405SHong Zhang /* ICNTL(15-17) not used */ 31179566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 31189566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 31199566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 31209566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 31219566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 31229566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 3123c0165424SHong Zhang 31249566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 31259566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 31269566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 31279566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 31289566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 31299566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 313042179a6aSHong Zhang 31319566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 31329566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 31339566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 31349566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 31359566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 313650ea2040Saszaboa PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(37) (compression of the contribution blocks): %d\n", mumps->id.ICNTL(37))); 31379566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 3138c92b4f89SPierre Jolivet PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(48) (multithreading with tree parallelism): %d\n", mumps->id.ICNTL(48))); 313991b026caSPierre Jolivet PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56))); 3140146931dbSPierre Jolivet PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 3141f6c57405SHong Zhang 3142cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 1))); 3143cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2))); 3144cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 3))); 3145cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)ID_CNTL_GET(mumps->id, 4))); 3146cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)ID_CNTL_GET(mumps->id, 5))); 3147cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)ID_CNTL_GET(mumps->id, 7))); 3148f6c57405SHong Zhang 3149a5b23f4aSJose E. Roman /* information local to each processor */ 31509566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 31519566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 3152cf053153SJunchao Zhang PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1))); 31539566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 31549566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 3155cf053153SJunchao Zhang PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2))); 31569566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 31579566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 3158cf053153SJunchao Zhang PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3))); 31599566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 3160f6c57405SHong Zhang 31619566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 31629566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 31639566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 3164f6c57405SHong Zhang 31659566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 31669566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 31679566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 3168f6c57405SHong Zhang 31699566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 31709566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 31719566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 3172b34f08ffSHong Zhang 3173a0e18203SThibaut Appel if (mumps->ninfo && mumps->ninfo <= 80) { 3174b34f08ffSHong Zhang PetscInt i; 3175b34f08ffSHong Zhang for (i = 0; i < mumps->ninfo; i++) { 31769566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 31779566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 31789566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 3179b34f08ffSHong Zhang } 3180b34f08ffSHong Zhang } 31819566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 31821511cd71SPierre Jolivet } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 3183f6c57405SHong Zhang 31841511cd71SPierre Jolivet if (mumps->myid == 0) { /* information from the host */ 3185cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1))); 3186cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2))); 3187cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3))); 3188cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34))); 3189f6c57405SHong Zhang 31909566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 31919566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 31929566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 31939566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 31949566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 31959566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 31969566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 31979566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 31989566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 31999566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 32009566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 32019566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 32029566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 32039566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 32049566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 32059566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 32069566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 32079566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 32089566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 32099566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 32109566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 32119566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 32129566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 32139566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 32149566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 32159566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 32169566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 32179566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 32189566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 32199566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 32209566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 32219566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 32229566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 32239566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 3224f6c57405SHong Zhang } 3225f6c57405SHong Zhang } 3226cb828f0fSHong Zhang } 32273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3228f6c57405SHong Zhang } 3229f6c57405SHong Zhang 3230d2a308c1SPierre Jolivet static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info) 3231d71ae5a4SJacob Faibussowitsch { 3232e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 323335bd34faSBarry Smith 323435bd34faSBarry Smith PetscFunctionBegin; 323535bd34faSBarry Smith info->block_size = 1.0; 323664412097SPierre Jolivet info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 323764412097SPierre Jolivet info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 323835bd34faSBarry Smith info->nz_unneeded = 0.0; 323935bd34faSBarry Smith info->assemblies = 0.0; 324035bd34faSBarry Smith info->mallocs = 0.0; 324135bd34faSBarry Smith info->memory = 0.0; 324235bd34faSBarry Smith info->fill_ratio_given = 0; 324335bd34faSBarry Smith info->fill_ratio_needed = 0; 324435bd34faSBarry Smith info->factor_mallocs = 0; 32453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 324635bd34faSBarry Smith } 324735bd34faSBarry Smith 324866976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 3249d71ae5a4SJacob Faibussowitsch { 3250e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3251a3d589ffSStefano Zampini const PetscScalar *arr; 32528e7ba810SStefano Zampini const PetscInt *idxs; 32538e7ba810SStefano Zampini PetscInt size, i; 32546444a565SStefano Zampini 32556444a565SStefano Zampini PetscFunctionBegin; 32569566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is, &size)); 3257b3cb21ddSStefano Zampini /* Schur complement matrix */ 32589566063dSJacob Faibussowitsch PetscCall(MatDestroy(&F->schur)); 32599566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 32609566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 3261cf053153SJunchao Zhang // don't allocate mumps->id.schur[] now as its precision is yet to know 32626497c311SBarry Smith PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur)); 32636497c311SBarry Smith PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld)); 32649566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 326548a46eb9SPierre Jolivet if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 3266b3cb21ddSStefano Zampini 3267b3cb21ddSStefano Zampini /* MUMPS expects Fortran style indices */ 32689566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.listvar_schur)); 32699566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 32709566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is, &idxs)); 3271f4f49eeaSPierre Jolivet for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i])); 32729566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is, &idxs)); 327359ac8732SStefano Zampini /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 3274cf053153SJunchao Zhang if (mumps->id.icntl) mumps->id.ICNTL(26) = -1; 3275cf053153SJunchao Zhang else mumps->ICNTL26 = -1; 32763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 32776444a565SStefano Zampini } 327859ac8732SStefano Zampini 327966976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 3280d71ae5a4SJacob Faibussowitsch { 32816444a565SStefano Zampini Mat St; 3282e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 32836444a565SStefano Zampini PetscScalar *array; 3284cf053153SJunchao Zhang PetscInt i, j, N = mumps->id.size_schur; 32856444a565SStefano Zampini 32866444a565SStefano Zampini PetscFunctionBegin; 32879261f6e4SBarry Smith PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 32889566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 32899566063dSJacob Faibussowitsch PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 32909566063dSJacob Faibussowitsch PetscCall(MatSetType(St, MATDENSE)); 32919566063dSJacob Faibussowitsch PetscCall(MatSetUp(St)); 32929566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(St, &array)); 329359ac8732SStefano Zampini if (!mumps->sym) { /* MUMPS always return a full matrix */ 32946444a565SStefano Zampini if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 32956444a565SStefano Zampini for (i = 0; i < N; i++) { 3296cf053153SJunchao Zhang for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 32976444a565SStefano Zampini } 32986444a565SStefano Zampini } else { /* stored by columns */ 3299cf053153SJunchao Zhang PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array)); 33006444a565SStefano Zampini } 33016444a565SStefano Zampini } else { /* either full or lower-triangular (not packed) */ 33026444a565SStefano Zampini if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 33036444a565SStefano Zampini for (i = 0; i < N; i++) { 3304cf053153SJunchao Zhang for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 33056444a565SStefano Zampini } 33066444a565SStefano Zampini } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 3307cf053153SJunchao Zhang PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array)); 33086444a565SStefano Zampini } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 33096444a565SStefano Zampini for (i = 0; i < N; i++) { 3310cf053153SJunchao Zhang for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 33116444a565SStefano Zampini } 33126444a565SStefano Zampini } 33136444a565SStefano Zampini } 33149566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(St, &array)); 33156444a565SStefano Zampini *S = St; 33163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 33176444a565SStefano Zampini } 33186444a565SStefano Zampini 331966976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 3320d71ae5a4SJacob Faibussowitsch { 3321e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 33225ccb76cbSHong Zhang 33235ccb76cbSHong Zhang PetscFunctionBegin; 3324413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 33256497c311SBarry Smith PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 33269371c9d4SSatish Balay for (i = 0; i < nICNTL_pre; ++i) 33279371c9d4SSatish Balay if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 3328413bcc21SPierre Jolivet if (i == nICNTL_pre) { /* not already cached */ 3329413bcc21SPierre Jolivet if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 3330413bcc21SPierre Jolivet else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 3331413bcc21SPierre Jolivet mumps->ICNTL_pre[0]++; 3332413bcc21SPierre Jolivet } 33336497c311SBarry Smith mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl; 3334413bcc21SPierre Jolivet PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 3335413bcc21SPierre Jolivet } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 33363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 33375ccb76cbSHong Zhang } 33385ccb76cbSHong Zhang 333966976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 3340d71ae5a4SJacob Faibussowitsch { 3341e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3342bc6112feSHong Zhang 3343bc6112feSHong Zhang PetscFunctionBegin; 334436df9881Sjeremy theler if (mumps->id.job == JOB_NULL) { 334536df9881Sjeremy theler PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 334636df9881Sjeremy theler *ival = 0; 334736df9881Sjeremy theler for (i = 0; i < nICNTL_pre; ++i) { 334836df9881Sjeremy theler if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 334936df9881Sjeremy theler } 335036df9881Sjeremy theler } else *ival = mumps->id.ICNTL(icntl); 33513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3352bc6112feSHong Zhang } 3353bc6112feSHong Zhang 33545ccb76cbSHong Zhang /*@ 33551d27aa22SBarry Smith MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 33565ccb76cbSHong Zhang 3357c3339decSBarry Smith Logically Collective 33585ccb76cbSHong Zhang 33595ccb76cbSHong Zhang Input Parameters: 33600b4b7b1cSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 336179578405SBarry Smith . icntl - index of MUMPS parameter array `ICNTL()` 336279578405SBarry Smith - ival - value of MUMPS `ICNTL(icntl)` 33635ccb76cbSHong Zhang 33643c7db156SBarry Smith Options Database Key: 336579578405SBarry Smith . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival` 33665ccb76cbSHong Zhang 33675ccb76cbSHong Zhang Level: beginner 33685ccb76cbSHong Zhang 336979578405SBarry Smith Note: 337079578405SBarry Smith Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 337179578405SBarry Smith 33721cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 33735ccb76cbSHong Zhang @*/ 3374d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 3375d71ae5a4SJacob Faibussowitsch { 33765ccb76cbSHong Zhang PetscFunctionBegin; 33772989dfd4SHong Zhang PetscValidType(F, 1); 337828b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 33795ccb76cbSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 33805ccb76cbSHong Zhang PetscValidLogicalCollectiveInt(F, ival, 3); 338191b026caSPierre Jolivet PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 3382cac4c232SBarry Smith PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 33833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 33845ccb76cbSHong Zhang } 33855ccb76cbSHong Zhang 3386a21f80fcSHong Zhang /*@ 33871d27aa22SBarry Smith MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 3388a21f80fcSHong Zhang 3389c3339decSBarry Smith Logically Collective 3390a21f80fcSHong Zhang 3391a21f80fcSHong Zhang Input Parameters: 33920b4b7b1cSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3393a21f80fcSHong Zhang - icntl - index of MUMPS parameter array ICNTL() 3394a21f80fcSHong Zhang 3395a21f80fcSHong Zhang Output Parameter: 3396a21f80fcSHong Zhang . ival - value of MUMPS ICNTL(icntl) 3397a21f80fcSHong Zhang 3398a21f80fcSHong Zhang Level: beginner 3399a21f80fcSHong Zhang 34001cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3401a21f80fcSHong Zhang @*/ 3402d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 3403d71ae5a4SJacob Faibussowitsch { 3404bc6112feSHong Zhang PetscFunctionBegin; 34052989dfd4SHong Zhang PetscValidType(F, 1); 340628b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3407bc6112feSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 34084f572ea9SToby Isaac PetscAssertPointer(ival, 3); 3409c92b4f89SPierre Jolivet PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 3410cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 34113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3412bc6112feSHong Zhang } 3413bc6112feSHong Zhang 341466976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 3415d71ae5a4SJacob Faibussowitsch { 3416e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 34178928b65cSHong Zhang 34188928b65cSHong Zhang PetscFunctionBegin; 3419413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { 3420413bcc21SPierre Jolivet PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 34219371c9d4SSatish Balay for (i = 0; i < nCNTL_pre; ++i) 34229371c9d4SSatish Balay if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 3423413bcc21SPierre Jolivet if (i == nCNTL_pre) { 3424413bcc21SPierre Jolivet if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 3425413bcc21SPierre Jolivet else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 3426413bcc21SPierre Jolivet mumps->CNTL_pre[0]++; 3427413bcc21SPierre Jolivet } 3428413bcc21SPierre Jolivet mumps->CNTL_pre[1 + 2 * i] = icntl; 3429413bcc21SPierre Jolivet mumps->CNTL_pre[2 + 2 * i] = val; 3430cf053153SJunchao Zhang } else ID_CNTL_SET(mumps->id, icntl, val); 34313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 34328928b65cSHong Zhang } 34338928b65cSHong Zhang 343466976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 3435d71ae5a4SJacob Faibussowitsch { 3436e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3437bc6112feSHong Zhang 3438bc6112feSHong Zhang PetscFunctionBegin; 343936df9881Sjeremy theler if (mumps->id.job == JOB_NULL) { 344036df9881Sjeremy theler PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 344136df9881Sjeremy theler *val = 0.0; 344236df9881Sjeremy theler for (i = 0; i < nCNTL_pre; ++i) { 344336df9881Sjeremy theler if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 344436df9881Sjeremy theler } 3445cf053153SJunchao Zhang } else *val = ID_CNTL_GET(mumps->id, icntl); 34463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3447bc6112feSHong Zhang } 3448bc6112feSHong Zhang 34498928b65cSHong Zhang /*@ 34501d27aa22SBarry Smith MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 34518928b65cSHong Zhang 3452c3339decSBarry Smith Logically Collective 34538928b65cSHong Zhang 34548928b65cSHong Zhang Input Parameters: 34550b4b7b1cSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 345679578405SBarry Smith . icntl - index of MUMPS parameter array `CNTL()` 345779578405SBarry Smith - val - value of MUMPS `CNTL(icntl)` 34588928b65cSHong Zhang 34593c7db156SBarry Smith Options Database Key: 3460147403d9SBarry Smith . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 34618928b65cSHong Zhang 34628928b65cSHong Zhang Level: beginner 34638928b65cSHong Zhang 346479578405SBarry Smith Note: 346579578405SBarry Smith Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 346679578405SBarry Smith 34671cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 34688928b65cSHong Zhang @*/ 3469d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 3470d71ae5a4SJacob Faibussowitsch { 34718928b65cSHong Zhang PetscFunctionBegin; 34722989dfd4SHong Zhang PetscValidType(F, 1); 347328b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 34748928b65cSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 3475bc6112feSHong Zhang PetscValidLogicalCollectiveReal(F, val, 3); 3476413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3477cac4c232SBarry Smith PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 34783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 34798928b65cSHong Zhang } 34808928b65cSHong Zhang 3481a21f80fcSHong Zhang /*@ 34821d27aa22SBarry Smith MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3483a21f80fcSHong Zhang 3484c3339decSBarry Smith Logically Collective 3485a21f80fcSHong Zhang 3486a21f80fcSHong Zhang Input Parameters: 34870b4b7b1cSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3488a21f80fcSHong Zhang - icntl - index of MUMPS parameter array CNTL() 3489a21f80fcSHong Zhang 3490a21f80fcSHong Zhang Output Parameter: 3491a21f80fcSHong Zhang . val - value of MUMPS CNTL(icntl) 3492a21f80fcSHong Zhang 3493a21f80fcSHong Zhang Level: beginner 3494a21f80fcSHong Zhang 34951cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3496a21f80fcSHong Zhang @*/ 3497d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 3498d71ae5a4SJacob Faibussowitsch { 3499bc6112feSHong Zhang PetscFunctionBegin; 35002989dfd4SHong Zhang PetscValidType(F, 1); 350128b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3502bc6112feSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 35034f572ea9SToby Isaac PetscAssertPointer(val, 3); 3504413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3505cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 35063ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3507bc6112feSHong Zhang } 3508bc6112feSHong Zhang 350966976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 3510d71ae5a4SJacob Faibussowitsch { 3511e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3512bc6112feSHong Zhang 3513bc6112feSHong Zhang PetscFunctionBegin; 3514bc6112feSHong Zhang *info = mumps->id.INFO(icntl); 35153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3516bc6112feSHong Zhang } 3517bc6112feSHong Zhang 351866976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 3519d71ae5a4SJacob Faibussowitsch { 3520e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3521bc6112feSHong Zhang 3522bc6112feSHong Zhang PetscFunctionBegin; 3523bc6112feSHong Zhang *infog = mumps->id.INFOG(icntl); 35243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3525bc6112feSHong Zhang } 3526bc6112feSHong Zhang 352766976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 3528d71ae5a4SJacob Faibussowitsch { 3529e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3530bc6112feSHong Zhang 3531bc6112feSHong Zhang PetscFunctionBegin; 3532cf053153SJunchao Zhang *rinfo = ID_RINFO_GET(mumps->id, icntl); 35333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3534bc6112feSHong Zhang } 3535bc6112feSHong Zhang 353666976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 3537d71ae5a4SJacob Faibussowitsch { 3538e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3539bc6112feSHong Zhang 3540bc6112feSHong Zhang PetscFunctionBegin; 3541cf053153SJunchao Zhang *rinfog = ID_RINFOG_GET(mumps->id, icntl); 35423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3543bc6112feSHong Zhang } 3544bc6112feSHong Zhang 354566976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 35465c0bae8cSAshish Patel { 35475c0bae8cSAshish Patel Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 35485c0bae8cSAshish Patel 35495c0bae8cSAshish Patel PetscFunctionBegin; 35505c0bae8cSAshish Patel PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 35515c0bae8cSAshish Patel *size = 0; 35525c0bae8cSAshish Patel *array = NULL; 35535c0bae8cSAshish Patel if (!mumps->myid) { 35545c0bae8cSAshish Patel *size = mumps->id.INFOG(28); 35555c0bae8cSAshish Patel PetscCall(PetscMalloc1(*size, array)); 35565c0bae8cSAshish Patel for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 35575c0bae8cSAshish Patel } 35585c0bae8cSAshish Patel PetscFunctionReturn(PETSC_SUCCESS); 35595c0bae8cSAshish Patel } 35605c0bae8cSAshish Patel 356166976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 3562d71ae5a4SJacob Faibussowitsch { 35630e6b8875SHong Zhang Mat Bt = NULL, Btseq = NULL; 35640e6b8875SHong Zhang PetscBool flg; 3565bb599dfdSHong Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3566bb599dfdSHong Zhang PetscScalar *aa; 3567f410b75aSHong Zhang PetscInt spnr, *ia, *ja, M, nrhs; 3568bb599dfdSHong Zhang 3569bb599dfdSHong Zhang PetscFunctionBegin; 35704f572ea9SToby Isaac PetscAssertPointer(spRHS, 2); 3571013e2dc7SBarry Smith PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 357253587d93SPierre Jolivet PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 357353587d93SPierre Jolivet PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 35749566063dSJacob Faibussowitsch PetscCall(MatTransposeGetMat(spRHS, &Bt)); 3575bb599dfdSHong Zhang 35769566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3577bb599dfdSHong Zhang 35782d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 35790e6b8875SHong Zhang Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 35800e6b8875SHong Zhang Btseq = b->A; 35810e6b8875SHong Zhang } else { 35820e6b8875SHong Zhang Btseq = Bt; 35830e6b8875SHong Zhang } 35840e6b8875SHong Zhang 35859566063dSJacob Faibussowitsch PetscCall(MatGetSize(spRHS, &M, &nrhs)); 35866497c311SBarry Smith mumps->id.nrhs = (PetscMUMPSInt)nrhs; 35876497c311SBarry Smith PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs)); 3588f410b75aSHong Zhang mumps->id.rhs = NULL; 3589f410b75aSHong Zhang 3590e3f2db6aSHong Zhang if (!mumps->myid) { 35919566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 35929566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 359328b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 35949566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3595cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 3596e3f2db6aSHong Zhang } else { 3597e3f2db6aSHong Zhang mumps->id.irhs_ptr = NULL; 3598e3f2db6aSHong Zhang mumps->id.irhs_sparse = NULL; 3599e3f2db6aSHong Zhang mumps->id.nz_rhs = 0; 3600cf053153SJunchao Zhang if (mumps->id.rhs_sparse_len) { 3601cf053153SJunchao Zhang PetscCall(PetscFree(mumps->id.rhs_sparse)); 3602cf053153SJunchao Zhang mumps->id.rhs_sparse_len = 0; 3603cf053153SJunchao Zhang } 3604e3f2db6aSHong Zhang } 3605bb599dfdSHong Zhang mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3606e3f2db6aSHong Zhang mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3607bb599dfdSHong Zhang 3608bb599dfdSHong Zhang /* solve phase */ 3609bb599dfdSHong Zhang mumps->id.job = JOB_SOLVE; 36103ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 36119261f6e4SBarry Smith PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 361214267174SHong Zhang 3613e3f2db6aSHong Zhang if (!mumps->myid) { 36149566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 36159566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 361628b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3617e3f2db6aSHong Zhang } 36183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3619bb599dfdSHong Zhang } 3620bb599dfdSHong Zhang 3621bb599dfdSHong Zhang /*@ 36221d27aa22SBarry Smith MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc> 3623bb599dfdSHong Zhang 3624c3339decSBarry Smith Logically Collective 3625bb599dfdSHong Zhang 362620f4b53cSBarry Smith Input Parameter: 36270b4b7b1cSBarry Smith . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3628bb599dfdSHong Zhang 3629bb599dfdSHong Zhang Output Parameter: 363020f4b53cSBarry Smith . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3631bb599dfdSHong Zhang 3632bb599dfdSHong Zhang Level: beginner 3633bb599dfdSHong Zhang 36341cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3635bb599dfdSHong Zhang @*/ 3636d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3637d71ae5a4SJacob Faibussowitsch { 3638bb599dfdSHong Zhang PetscFunctionBegin; 3639bb599dfdSHong Zhang PetscValidType(F, 1); 364028b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3641cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 36423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3643bb599dfdSHong Zhang } 3644bb599dfdSHong Zhang 364566976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3646d71ae5a4SJacob Faibussowitsch { 36470e6b8875SHong Zhang Mat spRHS; 36480e6b8875SHong Zhang 36490e6b8875SHong Zhang PetscFunctionBegin; 36509566063dSJacob Faibussowitsch PetscCall(MatCreateTranspose(spRHST, &spRHS)); 36519566063dSJacob Faibussowitsch PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 36529566063dSJacob Faibussowitsch PetscCall(MatDestroy(&spRHS)); 36533ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 36540e6b8875SHong Zhang } 36550e6b8875SHong Zhang 36560e6b8875SHong Zhang /*@ 36571d27aa22SBarry Smith MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc> 36580e6b8875SHong Zhang 3659c3339decSBarry Smith Logically Collective 36600e6b8875SHong Zhang 366120f4b53cSBarry Smith Input Parameter: 36620b4b7b1cSBarry Smith . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 36630e6b8875SHong Zhang 36640e6b8875SHong Zhang Output Parameter: 366520f4b53cSBarry Smith . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 36660e6b8875SHong Zhang 36670e6b8875SHong Zhang Level: beginner 36680e6b8875SHong Zhang 36691cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 36700e6b8875SHong Zhang @*/ 3671d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3672d71ae5a4SJacob Faibussowitsch { 36730e6b8875SHong Zhang PetscBool flg; 36740e6b8875SHong Zhang 36750e6b8875SHong Zhang PetscFunctionBegin; 36760e6b8875SHong Zhang PetscValidType(F, 1); 367728b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 36789566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 367928b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3680cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 36813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 36820e6b8875SHong Zhang } 36830e6b8875SHong Zhang 368493d70b8aSPierre Jolivet static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 368593d70b8aSPierre Jolivet { 368693d70b8aSPierre Jolivet Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 368793d70b8aSPierre Jolivet 368893d70b8aSPierre Jolivet PetscFunctionBegin; 368993d70b8aSPierre Jolivet if (nblk) { 369093d70b8aSPierre Jolivet PetscAssertPointer(blkptr, 4); 369193d70b8aSPierre Jolivet PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk)); 369293d70b8aSPierre Jolivet PetscCall(PetscFree(mumps->id.blkptr)); 369393d70b8aSPierre Jolivet PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr)); 369493d70b8aSPierre Jolivet for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i)); 3695cf053153SJunchao Zhang // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15). 3696cf053153SJunchao Zhang // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS(). 3697cf053153SJunchao Zhang // mumps->id.ICNTL(15) = 1; 369893d70b8aSPierre Jolivet if (blkvar) { 369993d70b8aSPierre Jolivet PetscCall(PetscFree(mumps->id.blkvar)); 370093d70b8aSPierre Jolivet PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar)); 370193d70b8aSPierre Jolivet for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i)); 370293d70b8aSPierre Jolivet } 370393d70b8aSPierre Jolivet } else { 370493d70b8aSPierre Jolivet PetscCall(PetscFree(mumps->id.blkptr)); 370593d70b8aSPierre Jolivet PetscCall(PetscFree(mumps->id.blkvar)); 3706cf053153SJunchao Zhang // mumps->id.ICNTL(15) = 0; 3707cf053153SJunchao Zhang mumps->id.nblk = 0; 370893d70b8aSPierre Jolivet } 370993d70b8aSPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS); 371093d70b8aSPierre Jolivet } 371193d70b8aSPierre Jolivet 371293d70b8aSPierre Jolivet /*@ 371393d70b8aSPierre Jolivet MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1` 371493d70b8aSPierre Jolivet 371593d70b8aSPierre Jolivet Not collective, only relevant on the first process of the MPI communicator 371693d70b8aSPierre Jolivet 371793d70b8aSPierre Jolivet Input Parameters: 371893d70b8aSPierre Jolivet + F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 371993d70b8aSPierre Jolivet . nblk - the number of blocks 372093d70b8aSPierre Jolivet . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk` 372193d70b8aSPierre Jolivet - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks 372293d70b8aSPierre Jolivet 372393d70b8aSPierre Jolivet Level: advanced 372493d70b8aSPierre Jolivet 372593d70b8aSPierre Jolivet .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()` 372693d70b8aSPierre Jolivet @*/ 372793d70b8aSPierre Jolivet PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 372893d70b8aSPierre Jolivet { 372993d70b8aSPierre Jolivet PetscFunctionBegin; 373093d70b8aSPierre Jolivet PetscValidType(F, 1); 373193d70b8aSPierre Jolivet PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 373293d70b8aSPierre Jolivet PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr)); 373393d70b8aSPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS); 373493d70b8aSPierre Jolivet } 373593d70b8aSPierre Jolivet 3736a21f80fcSHong Zhang /*@ 37371d27aa22SBarry Smith MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc> 3738a21f80fcSHong Zhang 3739c3339decSBarry Smith Logically Collective 3740a21f80fcSHong Zhang 3741a21f80fcSHong Zhang Input Parameters: 37420b4b7b1cSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3743a21f80fcSHong Zhang - icntl - index of MUMPS parameter array INFO() 3744a21f80fcSHong Zhang 3745a21f80fcSHong Zhang Output Parameter: 3746a21f80fcSHong Zhang . ival - value of MUMPS INFO(icntl) 3747a21f80fcSHong Zhang 3748a21f80fcSHong Zhang Level: beginner 3749a21f80fcSHong Zhang 37501cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3751a21f80fcSHong Zhang @*/ 3752d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3753d71ae5a4SJacob Faibussowitsch { 3754bc6112feSHong Zhang PetscFunctionBegin; 37552989dfd4SHong Zhang PetscValidType(F, 1); 375628b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 37574f572ea9SToby Isaac PetscAssertPointer(ival, 3); 3758cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 37593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3760bc6112feSHong Zhang } 3761bc6112feSHong Zhang 3762a21f80fcSHong Zhang /*@ 37631d27aa22SBarry Smith MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc> 3764a21f80fcSHong Zhang 3765c3339decSBarry Smith Logically Collective 3766a21f80fcSHong Zhang 3767a21f80fcSHong Zhang Input Parameters: 37680b4b7b1cSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3769a21f80fcSHong Zhang - icntl - index of MUMPS parameter array INFOG() 3770a21f80fcSHong Zhang 3771a21f80fcSHong Zhang Output Parameter: 3772a21f80fcSHong Zhang . ival - value of MUMPS INFOG(icntl) 3773a21f80fcSHong Zhang 3774a21f80fcSHong Zhang Level: beginner 3775a21f80fcSHong Zhang 37761cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3777a21f80fcSHong Zhang @*/ 3778d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3779d71ae5a4SJacob Faibussowitsch { 3780bc6112feSHong Zhang PetscFunctionBegin; 37812989dfd4SHong Zhang PetscValidType(F, 1); 378228b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 37834f572ea9SToby Isaac PetscAssertPointer(ival, 3); 3784cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 37853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3786bc6112feSHong Zhang } 3787bc6112feSHong Zhang 3788a21f80fcSHong Zhang /*@ 37891d27aa22SBarry Smith MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc> 3790a21f80fcSHong Zhang 3791c3339decSBarry Smith Logically Collective 3792a21f80fcSHong Zhang 3793a21f80fcSHong Zhang Input Parameters: 37940b4b7b1cSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3795a21f80fcSHong Zhang - icntl - index of MUMPS parameter array RINFO() 3796a21f80fcSHong Zhang 3797a21f80fcSHong Zhang Output Parameter: 3798a21f80fcSHong Zhang . val - value of MUMPS RINFO(icntl) 3799a21f80fcSHong Zhang 3800a21f80fcSHong Zhang Level: beginner 3801a21f80fcSHong Zhang 38021cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3803a21f80fcSHong Zhang @*/ 3804d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3805d71ae5a4SJacob Faibussowitsch { 3806bc6112feSHong Zhang PetscFunctionBegin; 38072989dfd4SHong Zhang PetscValidType(F, 1); 380828b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 38094f572ea9SToby Isaac PetscAssertPointer(val, 3); 3810cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 38113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3812bc6112feSHong Zhang } 3813bc6112feSHong Zhang 3814a21f80fcSHong Zhang /*@ 38151d27aa22SBarry Smith MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc> 3816a21f80fcSHong Zhang 3817c3339decSBarry Smith Logically Collective 3818a21f80fcSHong Zhang 3819a21f80fcSHong Zhang Input Parameters: 38200b4b7b1cSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3821a21f80fcSHong Zhang - icntl - index of MUMPS parameter array RINFOG() 3822a21f80fcSHong Zhang 3823a21f80fcSHong Zhang Output Parameter: 3824a21f80fcSHong Zhang . val - value of MUMPS RINFOG(icntl) 3825a21f80fcSHong Zhang 3826a21f80fcSHong Zhang Level: beginner 3827a21f80fcSHong Zhang 38281cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3829a21f80fcSHong Zhang @*/ 3830d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3831d71ae5a4SJacob Faibussowitsch { 3832bc6112feSHong Zhang PetscFunctionBegin; 38332989dfd4SHong Zhang PetscValidType(F, 1); 383428b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 38354f572ea9SToby Isaac PetscAssertPointer(val, 3); 3836cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 38373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3838bc6112feSHong Zhang } 3839bc6112feSHong Zhang 38405c0bae8cSAshish Patel /*@ 38411d27aa22SBarry Smith MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc> 38425c0bae8cSAshish Patel 38435c0bae8cSAshish Patel Logically Collective 38445c0bae8cSAshish Patel 38455c0bae8cSAshish Patel Input Parameter: 38460b4b7b1cSBarry Smith . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 38475c0bae8cSAshish Patel 38485c0bae8cSAshish Patel Output Parameters: 38490b4b7b1cSBarry Smith + size - local size of the array. The size of the array is non-zero only on MPI rank 0 38505c0bae8cSAshish Patel - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 38515c0bae8cSAshish Patel for freeing this array. 38525c0bae8cSAshish Patel 38535c0bae8cSAshish Patel Level: beginner 38545c0bae8cSAshish Patel 38551cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 38565c0bae8cSAshish Patel @*/ 38575c0bae8cSAshish Patel PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 38585c0bae8cSAshish Patel { 38595c0bae8cSAshish Patel PetscFunctionBegin; 38605c0bae8cSAshish Patel PetscValidType(F, 1); 38615c0bae8cSAshish Patel PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 38624f572ea9SToby Isaac PetscAssertPointer(size, 2); 38634f572ea9SToby Isaac PetscAssertPointer(array, 3); 38645c0bae8cSAshish Patel PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 38655c0bae8cSAshish Patel PetscFunctionReturn(PETSC_SUCCESS); 38665c0bae8cSAshish Patel } 38675c0bae8cSAshish Patel 386824b6179bSKris Buschelman /*MC 38692692d6eeSBarry Smith MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 38700b4b7b1cSBarry Smith MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc> 387124b6179bSKris Buschelman 387211a5261eSBarry Smith Works with `MATAIJ` and `MATSBAIJ` matrices 387324b6179bSKris Buschelman 3874c2b89b5dSBarry Smith Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3875c2b89b5dSBarry Smith 38762ef1f0ffSBarry Smith Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 38772ef1f0ffSBarry Smith See details below. 3878217d3b1eSJunchao Zhang 38792ef1f0ffSBarry Smith Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3880c2b89b5dSBarry Smith 388124b6179bSKris Buschelman Options Database Keys: 38824422a9fcSPatrick Sanan + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 38834422a9fcSPatrick Sanan . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 38844422a9fcSPatrick Sanan . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 38854422a9fcSPatrick Sanan . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 38864422a9fcSPatrick Sanan . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3887b53c1a7fSBarry Smith . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3888b53c1a7fSBarry Smith Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 38894422a9fcSPatrick Sanan . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 38904422a9fcSPatrick Sanan . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 38914422a9fcSPatrick Sanan . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 38924422a9fcSPatrick Sanan . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 38934422a9fcSPatrick Sanan . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 38944422a9fcSPatrick Sanan . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 389545e3843bSPierre Jolivet . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 38964422a9fcSPatrick Sanan . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 389725aac85cSJunchao Zhang . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 38984422a9fcSPatrick Sanan . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 38994422a9fcSPatrick Sanan . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 39004422a9fcSPatrick Sanan . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 39014422a9fcSPatrick Sanan . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 39024422a9fcSPatrick Sanan . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3903fa6fd9d0SPierre Jolivet . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering 39044422a9fcSPatrick Sanan . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 39054422a9fcSPatrick Sanan . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 39064422a9fcSPatrick Sanan . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 39074422a9fcSPatrick Sanan . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3908a0e18203SThibaut Appel . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3909a0e18203SThibaut Appel . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 391050ea2040Saszaboa . -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB) 3911a0e18203SThibaut Appel . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3912c92b4f89SPierre Jolivet . -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism 3913146931dbSPierre Jolivet . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 39144422a9fcSPatrick Sanan . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 39154422a9fcSPatrick Sanan . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 39164422a9fcSPatrick Sanan . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 39174422a9fcSPatrick Sanan . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3918217d3b1eSJunchao Zhang . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3919a0e18203SThibaut Appel . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3920217d3b1eSJunchao Zhang - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3921217d3b1eSJunchao Zhang Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 392224b6179bSKris Buschelman 392324b6179bSKris Buschelman Level: beginner 392424b6179bSKris Buschelman 392595452b02SPatrick Sanan Notes: 39261d27aa22SBarry Smith MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will 39272ef1f0ffSBarry Smith error if the matrix is Hermitian. 392838548759SBarry Smith 392926cc229bSBarry Smith When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 393026cc229bSBarry Smith `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 393126cc229bSBarry Smith 39322ef1f0ffSBarry Smith When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 39332ef1f0ffSBarry Smith the failure with 39342ef1f0ffSBarry Smith .vb 39352ef1f0ffSBarry Smith KSPGetPC(ksp,&pc); 39362ef1f0ffSBarry Smith PCFactorGetMatrix(pc,&mat); 39372ef1f0ffSBarry Smith MatMumpsGetInfo(mat,....); 39382ef1f0ffSBarry Smith MatMumpsGetInfog(mat,....); etc. 39392ef1f0ffSBarry Smith .ve 39402ef1f0ffSBarry Smith Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 39419fc87aa7SBarry Smith 3942a5399872SJunchao Zhang MUMPS provides 64-bit integer support in two build modes: 3943a5399872SJunchao Zhang full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3944a5399872SJunchao Zhang requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 39458fcaa860SBarry Smith 3946a5399872SJunchao Zhang selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3947a5399872SJunchao Zhang MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3948a5399872SJunchao Zhang columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3949a5399872SJunchao Zhang integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3950a5399872SJunchao Zhang 3951a5399872SJunchao Zhang With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3952a5399872SJunchao Zhang 3953a5399872SJunchao Zhang Two modes to run MUMPS/PETSc with OpenMP 39542ef1f0ffSBarry Smith .vb 39550b4b7b1cSBarry Smith Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 39560b4b7b1cSBarry Smith threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test". 39572ef1f0ffSBarry Smith .ve 39588fcaa860SBarry Smith 39592ef1f0ffSBarry Smith .vb 39600b4b7b1cSBarry Smith `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example, 39612ef1f0ffSBarry Smith if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 39622ef1f0ffSBarry Smith .ve 39638fcaa860SBarry Smith 39648fcaa860SBarry Smith To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 39652ef1f0ffSBarry Smith (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 39662ef1f0ffSBarry Smith (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 39678fcaa860SBarry Smith libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 39680b4b7b1cSBarry Smith (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided). 3969217d3b1eSJunchao Zhang 39708fcaa860SBarry Smith If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3971217d3b1eSJunchao Zhang processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3972217d3b1eSJunchao Zhang size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3973217d3b1eSJunchao Zhang are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3974217d3b1eSJunchao Zhang by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3975217d3b1eSJunchao Zhang In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3976217d3b1eSJunchao Zhang if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 39770b4b7b1cSBarry Smith MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3978217d3b1eSJunchao Zhang cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 39790b4b7b1cSBarry Smith problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding. 39800b4b7b1cSBarry Smith For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and 3981217d3b1eSJunchao Zhang examine the mapping result. 3982217d3b1eSJunchao Zhang 398311a5261eSBarry Smith PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 398411a5261eSBarry Smith for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 398511a5261eSBarry Smith calls `omp_set_num_threads`(m) internally before calling MUMPS. 3986217d3b1eSJunchao Zhang 39871d27aa22SBarry Smith See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating` 3988217d3b1eSJunchao Zhang 398993d70b8aSPierre Jolivet .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()` 399024b6179bSKris Buschelman M*/ 399124b6179bSKris Buschelman 3992d2a308c1SPierre Jolivet static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type) 3993d71ae5a4SJacob Faibussowitsch { 399435bd34faSBarry Smith PetscFunctionBegin; 39952692d6eeSBarry Smith *type = MATSOLVERMUMPS; 39963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 399735bd34faSBarry Smith } 399835bd34faSBarry Smith 3999bccb9932SShri Abhyankar /* MatGetFactor for Seq and MPI AIJ matrices */ 4000d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 4001d71ae5a4SJacob Faibussowitsch { 40022877fffaSHong Zhang Mat B; 40032877fffaSHong Zhang Mat_MUMPS *mumps; 40044b9405b2SPierre Jolivet PetscBool isSeqAIJ, isDiag, isDense; 40052c7c0729SBarry Smith PetscMPIInt size; 40062877fffaSHong Zhang 40072877fffaSHong Zhang PetscFunctionBegin; 4008eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 400903e5aca4SStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 401003e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 401103e5aca4SStefano Zampini *F = NULL; 401203e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 401303e5aca4SStefano Zampini } 4014eb1ec7c1SStefano Zampini #endif 40152877fffaSHong Zhang /* Create the factorization matrix */ 40169566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 4017c3e1b152SPierre Jolivet PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 40184b9405b2SPierre Jolivet PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 40199566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 40209566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4021d2a308c1SPierre Jolivet PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 40229566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 40232877fffaSHong Zhang 40244dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 40252205254eSKarl Rupp 40262877fffaSHong Zhang B->ops->view = MatView_MUMPS; 402735bd34faSBarry Smith B->ops->getinfo = MatGetInfo_MUMPS; 40282205254eSKarl Rupp 40299566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 40309566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 40319566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 40329566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 40339566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 40349566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 40359566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 40369566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 40379566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 40389566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 40399566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 40405c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 40419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 40429566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 404393d70b8aSPierre Jolivet PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 40446444a565SStefano Zampini 4045450b117fSShri Abhyankar if (ftype == MAT_FACTOR_LU) { 4046450b117fSShri Abhyankar B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4047d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 4048bccb9932SShri Abhyankar if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 4049c3e1b152SPierre Jolivet else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 40504b9405b2SPierre Jolivet else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 4051bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 40529566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4053746480a1SHong Zhang mumps->sym = 0; 4054dcd589f8SShri Abhyankar } else { 405567877ebaSShri Abhyankar B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4056450b117fSShri Abhyankar B->factortype = MAT_FACTOR_CHOLESKY; 4057bccb9932SShri Abhyankar if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 4058c3e1b152SPierre Jolivet else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 40594b9405b2SPierre Jolivet else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 4060bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 40619566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 406259ac8732SStefano Zampini #if defined(PETSC_USE_COMPLEX) 406359ac8732SStefano Zampini mumps->sym = 2; 406459ac8732SStefano Zampini #else 4065b94d7dedSBarry Smith if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 40666fdc2a6dSBarry Smith else mumps->sym = 2; 406759ac8732SStefano Zampini #endif 4068450b117fSShri Abhyankar } 40692877fffaSHong Zhang 407000c67f3bSHong Zhang /* set solvertype */ 40719566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 40729566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 40739566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 40742c7c0729SBarry Smith if (size == 1) { 40754ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4076f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 40772c7c0729SBarry Smith } 40782877fffaSHong Zhang B->ops->destroy = MatDestroy_MUMPS; 4079e69c285eSBarry Smith B->data = (void *)mumps; 40802205254eSKarl Rupp 40812877fffaSHong Zhang *F = B; 4082413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 4083413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 4084413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 4085d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 40863ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 40872877fffaSHong Zhang } 40882877fffaSHong Zhang 4089bccb9932SShri Abhyankar /* MatGetFactor for Seq and MPI SBAIJ matrices */ 4090d2a308c1SPierre Jolivet static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F) 4091d71ae5a4SJacob Faibussowitsch { 40922877fffaSHong Zhang Mat B; 40932877fffaSHong Zhang Mat_MUMPS *mumps; 4094ace3abfcSBarry Smith PetscBool isSeqSBAIJ; 40952c7c0729SBarry Smith PetscMPIInt size; 40962877fffaSHong Zhang 40972877fffaSHong Zhang PetscFunctionBegin; 4098eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 409903e5aca4SStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 410003e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 410103e5aca4SStefano Zampini *F = NULL; 410203e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 410303e5aca4SStefano Zampini } 4104eb1ec7c1SStefano Zampini #endif 41059566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 41069566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4107d2a308c1SPierre Jolivet PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 41089566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 4109e69c285eSBarry Smith 41104dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 41119566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 4112bccb9932SShri Abhyankar if (isSeqSBAIJ) { 411316ebf90aSShri Abhyankar mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 4114dcd589f8SShri Abhyankar } else { 4115bccb9932SShri Abhyankar mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 4116bccb9932SShri Abhyankar } 4117bccb9932SShri Abhyankar 411867877ebaSShri Abhyankar B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4119bccb9932SShri Abhyankar B->ops->view = MatView_MUMPS; 4120722b6324SPierre Jolivet B->ops->getinfo = MatGetInfo_MUMPS; 41212205254eSKarl Rupp 41229566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 41239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 41249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 41259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 41269566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 41279566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 41289566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 41299566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 41309566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 41319566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 41329566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 41335c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 41349566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 41359566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 413693d70b8aSPierre Jolivet PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 41372205254eSKarl Rupp 4138f4762488SHong Zhang B->factortype = MAT_FACTOR_CHOLESKY; 413959ac8732SStefano Zampini #if defined(PETSC_USE_COMPLEX) 414059ac8732SStefano Zampini mumps->sym = 2; 414159ac8732SStefano Zampini #else 4142b94d7dedSBarry Smith if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 41436fdc2a6dSBarry Smith else mumps->sym = 2; 414459ac8732SStefano Zampini #endif 4145a214ac2aSShri Abhyankar 414600c67f3bSHong Zhang /* set solvertype */ 41479566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 41489566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 41499566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 41502c7c0729SBarry Smith if (size == 1) { 41514ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4152f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 41532c7c0729SBarry Smith } 41549566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 4155f3c0ef26SHong Zhang B->ops->destroy = MatDestroy_MUMPS; 4156e69c285eSBarry Smith B->data = (void *)mumps; 41572205254eSKarl Rupp 41582877fffaSHong Zhang *F = B; 4159413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 4160413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 4161413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 4162d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 41633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 41642877fffaSHong Zhang } 416597969023SHong Zhang 4166d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 4167d71ae5a4SJacob Faibussowitsch { 416867877ebaSShri Abhyankar Mat B; 416967877ebaSShri Abhyankar Mat_MUMPS *mumps; 4170ace3abfcSBarry Smith PetscBool isSeqBAIJ; 41712c7c0729SBarry Smith PetscMPIInt size; 417267877ebaSShri Abhyankar 417367877ebaSShri Abhyankar PetscFunctionBegin; 417467877ebaSShri Abhyankar /* Create the factorization matrix */ 41759566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 41769566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 41779566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4178d2a308c1SPierre Jolivet PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 41799566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 4180450b117fSShri Abhyankar 41814dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 4182966bd95aSPierre Jolivet PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 4183450b117fSShri Abhyankar B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 4184450b117fSShri Abhyankar B->factortype = MAT_FACTOR_LU; 4185bccb9932SShri Abhyankar if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 4186bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 4187746480a1SHong Zhang mumps->sym = 0; 41889566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4189bccb9932SShri Abhyankar 4190450b117fSShri Abhyankar B->ops->view = MatView_MUMPS; 4191722b6324SPierre Jolivet B->ops->getinfo = MatGetInfo_MUMPS; 41922205254eSKarl Rupp 41939566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 41949566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 41959566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 41969566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 41979566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 41989566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 41999566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 42009566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 42019566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 42029566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 42039566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 42045c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 42059566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 42069566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 420793d70b8aSPierre Jolivet PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4208450b117fSShri Abhyankar 420900c67f3bSHong Zhang /* set solvertype */ 42109566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 42119566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 42129566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 42132c7c0729SBarry Smith if (size == 1) { 42144ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4215f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 42162c7c0729SBarry Smith } 42177ee00b23SStefano Zampini B->ops->destroy = MatDestroy_MUMPS; 42187ee00b23SStefano Zampini B->data = (void *)mumps; 42197ee00b23SStefano Zampini 42207ee00b23SStefano Zampini *F = B; 4221413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 4222413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 4223413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 4224d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 42253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 42267ee00b23SStefano Zampini } 42277ee00b23SStefano Zampini 42287ee00b23SStefano Zampini /* MatGetFactor for Seq and MPI SELL matrices */ 4229d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 4230d71ae5a4SJacob Faibussowitsch { 42317ee00b23SStefano Zampini Mat B; 42327ee00b23SStefano Zampini Mat_MUMPS *mumps; 42337ee00b23SStefano Zampini PetscBool isSeqSELL; 42342c7c0729SBarry Smith PetscMPIInt size; 42357ee00b23SStefano Zampini 42367ee00b23SStefano Zampini PetscFunctionBegin; 42377ee00b23SStefano Zampini /* Create the factorization matrix */ 42389566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 42399566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 42409566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4241d2a308c1SPierre Jolivet PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 42429566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 42437ee00b23SStefano Zampini 42444dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 42457ee00b23SStefano Zampini 42467ee00b23SStefano Zampini B->ops->view = MatView_MUMPS; 42477ee00b23SStefano Zampini B->ops->getinfo = MatGetInfo_MUMPS; 42487ee00b23SStefano Zampini 42499566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 42509566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 42519566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 42529566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 42539566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 42549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 42559566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 42569566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 42579566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 42589566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 42599566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 42605c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 42617ee00b23SStefano Zampini 4262966bd95aSPierre Jolivet PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 42637ee00b23SStefano Zampini B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 42647ee00b23SStefano Zampini B->factortype = MAT_FACTOR_LU; 4265966bd95aSPierre Jolivet PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 4266966bd95aSPierre Jolivet mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 42677ee00b23SStefano Zampini mumps->sym = 0; 42689566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 42697ee00b23SStefano Zampini 42707ee00b23SStefano Zampini /* set solvertype */ 42719566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 42729566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 42739566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 42742c7c0729SBarry Smith if (size == 1) { 42754ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4276f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 42772c7c0729SBarry Smith } 4278450b117fSShri Abhyankar B->ops->destroy = MatDestroy_MUMPS; 4279e69c285eSBarry Smith B->data = (void *)mumps; 42802205254eSKarl Rupp 4281450b117fSShri Abhyankar *F = B; 4282413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 4283413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 4284413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 4285d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 42863ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4287450b117fSShri Abhyankar } 428842c9c57cSBarry Smith 42899d0448ceSStefano Zampini /* MatGetFactor for MATNEST matrices */ 42909d0448ceSStefano Zampini static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 42919d0448ceSStefano Zampini { 42929d0448ceSStefano Zampini Mat B, **mats; 42939d0448ceSStefano Zampini Mat_MUMPS *mumps; 42949d0448ceSStefano Zampini PetscInt nr, nc; 42959d0448ceSStefano Zampini PetscMPIInt size; 429603e5aca4SStefano Zampini PetscBool flg = PETSC_TRUE; 42979d0448ceSStefano Zampini 42989d0448ceSStefano Zampini PetscFunctionBegin; 42999d0448ceSStefano Zampini #if defined(PETSC_USE_COMPLEX) 430003e5aca4SStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 430103e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 430203e5aca4SStefano Zampini *F = NULL; 430303e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 430403e5aca4SStefano Zampini } 43059d0448ceSStefano Zampini #endif 43069d0448ceSStefano Zampini 430703e5aca4SStefano Zampini /* Return if some condition is not satisfied */ 430803e5aca4SStefano Zampini *F = NULL; 43099d0448ceSStefano Zampini PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 43109d0448ceSStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY) { 43119d0448ceSStefano Zampini IS *rows, *cols; 43129d0448ceSStefano Zampini PetscInt *m, *M; 43139d0448ceSStefano Zampini 43149d0448ceSStefano Zampini PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 43159d0448ceSStefano Zampini PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 43169d0448ceSStefano Zampini PetscCall(MatNestGetISs(A, rows, cols)); 43179d0448ceSStefano Zampini for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 431803e5aca4SStefano Zampini if (!flg) { 431903e5aca4SStefano Zampini PetscCall(PetscFree2(rows, cols)); 432003e5aca4SStefano Zampini PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 432103e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 432203e5aca4SStefano Zampini } 43239d0448ceSStefano Zampini PetscCall(PetscMalloc2(nr, &m, nr, &M)); 43249d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 43259d0448ceSStefano Zampini for (PetscInt r = 0; flg && r < nr; r++) 43269d0448ceSStefano Zampini for (PetscInt k = r + 1; flg && k < nr; k++) 43279d0448ceSStefano Zampini if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 43289d0448ceSStefano Zampini PetscCall(PetscFree2(m, M)); 43299d0448ceSStefano Zampini PetscCall(PetscFree2(rows, cols)); 433003e5aca4SStefano Zampini if (!flg) { 433103e5aca4SStefano Zampini PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 433203e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 433303e5aca4SStefano Zampini } 43349d0448ceSStefano Zampini } 43359d0448ceSStefano Zampini 43369d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 43379d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 43389d0448ceSStefano Zampini Mat sub = mats[r][c]; 433953587d93SPierre Jolivet PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 43409d0448ceSStefano Zampini 43419d0448ceSStefano Zampini if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 434253587d93SPierre Jolivet PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 43439d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 43449d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 43459d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 43469d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 43479d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 43489d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 4349c3e1b152SPierre Jolivet PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 43504b9405b2SPierre Jolivet PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 43519d0448ceSStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY) { 4352dcab004fSPierre Jolivet if (r == c) { 43534b9405b2SPierre Jolivet if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 435440afc089SBarry Smith PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 435503e5aca4SStefano Zampini flg = PETSC_FALSE; 4356dcab004fSPierre Jolivet } 43574b9405b2SPierre Jolivet } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 435840afc089SBarry Smith PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 435903e5aca4SStefano Zampini flg = PETSC_FALSE; 436003e5aca4SStefano Zampini } 43614b9405b2SPierre Jolivet } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 43629afb9c56SPierre Jolivet PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 436303e5aca4SStefano Zampini flg = PETSC_FALSE; 43649d0448ceSStefano Zampini } 43659d0448ceSStefano Zampini } 436603e5aca4SStefano Zampini } 436703e5aca4SStefano Zampini if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 43689d0448ceSStefano Zampini 43699d0448ceSStefano Zampini /* Create the factorization matrix */ 43709d0448ceSStefano Zampini PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 43719d0448ceSStefano Zampini PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 43729d0448ceSStefano Zampini PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 43739d0448ceSStefano Zampini PetscCall(MatSetUp(B)); 43749d0448ceSStefano Zampini 43759d0448ceSStefano Zampini PetscCall(PetscNew(&mumps)); 43769d0448ceSStefano Zampini 43779d0448ceSStefano Zampini B->ops->view = MatView_MUMPS; 43789d0448ceSStefano Zampini B->ops->getinfo = MatGetInfo_MUMPS; 43799d0448ceSStefano Zampini 43809d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 43819d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 43829d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 43839d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 43849d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 43859d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 43869d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 43879d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 43889d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 43899d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 43909d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 43919d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 43929d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 43939d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 439493d70b8aSPierre Jolivet PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 43959d0448ceSStefano Zampini 43969d0448ceSStefano Zampini if (ftype == MAT_FACTOR_LU) { 43979d0448ceSStefano Zampini B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 43989d0448ceSStefano Zampini B->factortype = MAT_FACTOR_LU; 43999d0448ceSStefano Zampini mumps->sym = 0; 44009d0448ceSStefano Zampini } else { 44019d0448ceSStefano Zampini B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 44029d0448ceSStefano Zampini B->factortype = MAT_FACTOR_CHOLESKY; 44039d0448ceSStefano Zampini #if defined(PETSC_USE_COMPLEX) 44049d0448ceSStefano Zampini mumps->sym = 2; 44059d0448ceSStefano Zampini #else 44069d0448ceSStefano Zampini if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 44079d0448ceSStefano Zampini else mumps->sym = 2; 44089d0448ceSStefano Zampini #endif 44099d0448ceSStefano Zampini } 44109d0448ceSStefano Zampini mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 44119d0448ceSStefano Zampini PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 44129d0448ceSStefano Zampini 44139d0448ceSStefano Zampini PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 44149d0448ceSStefano Zampini if (size == 1) { 44159d0448ceSStefano Zampini /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 44169d0448ceSStefano Zampini B->canuseordering = PETSC_TRUE; 44179d0448ceSStefano Zampini } 44189d0448ceSStefano Zampini 44199d0448ceSStefano Zampini /* set solvertype */ 44209d0448ceSStefano Zampini PetscCall(PetscFree(B->solvertype)); 44219d0448ceSStefano Zampini PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 44229d0448ceSStefano Zampini B->ops->destroy = MatDestroy_MUMPS; 44239d0448ceSStefano Zampini B->data = (void *)mumps; 44249d0448ceSStefano Zampini 44259d0448ceSStefano Zampini *F = B; 44269d0448ceSStefano Zampini mumps->id.job = JOB_NULL; 44279d0448ceSStefano Zampini mumps->ICNTL_pre = NULL; 44289d0448ceSStefano Zampini mumps->CNTL_pre = NULL; 44299d0448ceSStefano Zampini mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 44309d0448ceSStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 44319d0448ceSStefano Zampini } 44329d0448ceSStefano Zampini 4433d1f0640dSPierre Jolivet PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 4434d71ae5a4SJacob Faibussowitsch { 443542c9c57cSBarry Smith PetscFunctionBegin; 44369566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 44379566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 44389566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 44399566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 44409566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 44419566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 44429566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 44439566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 44449566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 44459566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 44469566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 4447c3e1b152SPierre Jolivet PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4448c3e1b152SPierre Jolivet PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 44494b9405b2SPierre Jolivet PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 44504b9405b2SPierre Jolivet PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 44514b9405b2SPierre Jolivet PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 44524b9405b2SPierre Jolivet PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 44539d0448ceSStefano Zampini PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 44549d0448ceSStefano Zampini PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 44553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 445642c9c57cSBarry Smith } 4457