1397b6df1SKris Buschelman /* 2c2b5dc30SHong Zhang Provides an interface to the MUMPS sparse solver 3397b6df1SKris Buschelman */ 467602552SJunchao Zhang #include <petscpkg_version.h> 59d0448ceSStefano Zampini #include <petscsf.h> 6c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 87ee00b23SStefano Zampini #include <../src/mat/impls/sell/mpi/mpisell.h> 9cf053153SJunchao Zhang #include <petsc/private/vecimpl.h> 10397b6df1SKris Buschelman 119261f6e4SBarry Smith #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 129261f6e4SBarry Smith 13397b6df1SKris Buschelman EXTERN_C_BEGIN 14cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 15cf053153SJunchao Zhang #include <cmumps_c.h> 16cf053153SJunchao Zhang #include <zmumps_c.h> 17cf053153SJunchao Zhang #include <smumps_c.h> 18cf053153SJunchao Zhang #include <dmumps_c.h> 19cf053153SJunchao Zhang #else 20397b6df1SKris Buschelman #if defined(PETSC_USE_COMPLEX) 212907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 222907cef9SHong Zhang #include <cmumps_c.h> 23cf053153SJunchao Zhang #define MUMPS_c cmumps_c 24cf053153SJunchao Zhang #define MUMPS_STRUC_C CMUMPS_STRUC_C 25cf053153SJunchao Zhang #define MumpsScalar CMUMPS_COMPLEX 262907cef9SHong Zhang #else 27c6db04a5SJed Brown #include <zmumps_c.h> 28cf053153SJunchao Zhang #define MUMPS_c zmumps_c 29cf053153SJunchao Zhang #define MUMPS_STRUC_C ZMUMPS_STRUC_C 30cf053153SJunchao Zhang #define MumpsScalar ZMUMPS_COMPLEX 312907cef9SHong Zhang #endif 322907cef9SHong Zhang #else 332907cef9SHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 342907cef9SHong Zhang #include <smumps_c.h> 35cf053153SJunchao Zhang #define MUMPS_c smumps_c 36cf053153SJunchao Zhang #define MUMPS_STRUC_C SMUMPS_STRUC_C 37cf053153SJunchao Zhang #define MumpsScalar SMUMPS_REAL 38397b6df1SKris Buschelman #else 39c6db04a5SJed Brown #include <dmumps_c.h> 40cf053153SJunchao Zhang #define MUMPS_c dmumps_c 41cf053153SJunchao Zhang #define MUMPS_STRUC_C DMUMPS_STRUC_C 42cf053153SJunchao Zhang #define MumpsScalar DMUMPS_REAL 43cf053153SJunchao Zhang #endif 44397b6df1SKris Buschelman #endif 452907cef9SHong Zhang #endif 46397b6df1SKris Buschelman EXTERN_C_END 47cf053153SJunchao Zhang 48397b6df1SKris Buschelman #define JOB_INIT -1 49413bcc21SPierre Jolivet #define JOB_NULL 0 503d472b54SHong Zhang #define JOB_FACTSYMBOLIC 1 513d472b54SHong Zhang #define JOB_FACTNUMERIC 2 523d472b54SHong Zhang #define JOB_SOLVE 3 53397b6df1SKris Buschelman #define JOB_END -2 543d472b54SHong Zhang 55a6053eceSJunchao Zhang /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 56a6053eceSJunchao Zhang number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 57a6053eceSJunchao Zhang naming convention in PetscMPIInt, PetscBLASInt etc. 58a6053eceSJunchao Zhang */ 59a6053eceSJunchao Zhang typedef MUMPS_INT PetscMUMPSInt; 60a6053eceSJunchao Zhang 6167602552SJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 6267602552SJunchao Zhang #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 63f0b74427SPierre Jolivet #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 6467602552SJunchao Zhang #endif 65a6053eceSJunchao Zhang #else 6667602552SJunchao Zhang #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 67f0b74427SPierre Jolivet #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 6867602552SJunchao Zhang #endif 6967602552SJunchao Zhang #endif 7067602552SJunchao Zhang 71a6053eceSJunchao Zhang #define MPIU_MUMPSINT MPI_INT 72a6053eceSJunchao Zhang #define PETSC_MUMPS_INT_MAX 2147483647 73a6053eceSJunchao Zhang #define PETSC_MUMPS_INT_MIN -2147483648 74a6053eceSJunchao Zhang 75a6053eceSJunchao Zhang /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 766497c311SBarry Smith static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b) 77d71ae5a4SJacob Faibussowitsch { 78a6053eceSJunchao Zhang PetscFunctionBegin; 79ece88022SPierre Jolivet #if PetscDefined(USE_64BIT_INDICES) 802c71b3e2SJacob Faibussowitsch PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 81ece88022SPierre Jolivet #endif 8257508eceSPierre Jolivet *b = (PetscMUMPSInt)a; 833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 84a6053eceSJunchao Zhang } 85a6053eceSJunchao Zhang 86a6053eceSJunchao Zhang /* Put these utility routines here since they are only used in this file */ 87ce78bad3SBarry Smith static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 88d71ae5a4SJacob Faibussowitsch { 89a6053eceSJunchao Zhang PetscInt myval; 90a6053eceSJunchao Zhang PetscBool myset; 914d86920dSPierre Jolivet 92a6053eceSJunchao Zhang PetscFunctionBegin; 93a6053eceSJunchao Zhang /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 949566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 959566063dSJacob Faibussowitsch if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 96a6053eceSJunchao Zhang if (set) *set = myset; 973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 98a6053eceSJunchao Zhang } 99a6053eceSJunchao Zhang #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 100a6053eceSJunchao Zhang 101cf053153SJunchao Zhang // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C. 102cf053153SJunchao Zhang // 103cf053153SJunchao Zhang // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type. 104cf053153SJunchao Zhang // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size 105cf053153SJunchao Zhang // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall(). 106cf053153SJunchao Zhang // 107cf053153SJunchao Zhang // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface. 108cf053153SJunchao Zhang // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used. 109cf053153SJunchao Zhang // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers. 110cf053153SJunchao Zhang typedef struct { 111cf053153SJunchao Zhang PetscPrecision precision; // precision used by MUMPS 112cf053153SJunchao Zhang void *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex 113cf053153SJunchao Zhang 114cf053153SJunchao Zhang // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions. 115cf053153SJunchao Zhang MUMPS_INT sym, par, job; 116cf053153SJunchao Zhang MUMPS_INT comm_fortran; /* Fortran communicator */ 117cf053153SJunchao Zhang MUMPS_INT *icntl; 118cf053153SJunchao Zhang void *cntl; // MumpsReal, fixed size array 119cf053153SJunchao Zhang MUMPS_INT n; 120cf053153SJunchao Zhang MUMPS_INT nblk; 121cf053153SJunchao Zhang 122cf053153SJunchao Zhang /* Assembled entry */ 123cf053153SJunchao Zhang MUMPS_INT8 nnz; 124cf053153SJunchao Zhang MUMPS_INT *irn; 125cf053153SJunchao Zhang MUMPS_INT *jcn; 126cf053153SJunchao Zhang void *a; // MumpsScalar, centralized input 127cf053153SJunchao Zhang PetscCount a_len; 128cf053153SJunchao Zhang 129cf053153SJunchao Zhang /* Distributed entry */ 130cf053153SJunchao Zhang MUMPS_INT8 nnz_loc; 131cf053153SJunchao Zhang MUMPS_INT *irn_loc; 132cf053153SJunchao Zhang MUMPS_INT *jcn_loc; 133cf053153SJunchao Zhang void *a_loc; // MumpsScalar, distributed input 134cf053153SJunchao Zhang PetscCount a_loc_len; 135cf053153SJunchao Zhang 136cf053153SJunchao Zhang /* Matrix by blocks */ 137cf053153SJunchao Zhang MUMPS_INT *blkptr; 138cf053153SJunchao Zhang MUMPS_INT *blkvar; 139cf053153SJunchao Zhang 140cf053153SJunchao Zhang /* Ordering, if given by user */ 141cf053153SJunchao Zhang MUMPS_INT *perm_in; 142cf053153SJunchao Zhang 143cf053153SJunchao Zhang /* RHS, solution, ouptput data and statistics */ 144cf053153SJunchao Zhang void *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc; // MumpsScalar buffers 145cf053153SJunchao Zhang PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar 146cf053153SJunchao Zhang 147cf053153SJunchao Zhang MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc; 148*7096bf6aSJunchao Zhang MUMPS_INT nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc; 149*7096bf6aSJunchao Zhang // MUMPS_INT nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0) 150cf053153SJunchao Zhang MUMPS_INT schur_lld; 151cf053153SJunchao Zhang MUMPS_INT *info, *infog; // fixed size array 152cf053153SJunchao Zhang void *rinfo, *rinfog; // MumpsReal, fixed size array 153cf053153SJunchao Zhang 154cf053153SJunchao Zhang /* Null space */ 155cf053153SJunchao Zhang MUMPS_INT *pivnul_list; // allocated by MUMPS! 156cf053153SJunchao Zhang MUMPS_INT *mapping; // allocated by MUMPS! 157cf053153SJunchao Zhang 158cf053153SJunchao Zhang /* Schur */ 159cf053153SJunchao Zhang MUMPS_INT size_schur; 160cf053153SJunchao Zhang MUMPS_INT *listvar_schur; 161cf053153SJunchao Zhang void *schur; // MumpsScalar 162cf053153SJunchao Zhang PetscCount schur_len; 163cf053153SJunchao Zhang 164cf053153SJunchao Zhang /* For out-of-core */ 165cf053153SJunchao Zhang char *ooc_tmpdir; // fixed size array 166cf053153SJunchao Zhang char *ooc_prefix; // fixed size array 167cf053153SJunchao Zhang } XMUMPS_STRUC_C; 168cf053153SJunchao Zhang 169cf053153SJunchao Zhang // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct 170cf053153SJunchao Zhang #define AllocatInternalID(MUMPS_STRUC_T, outer) \ 171cf053153SJunchao Zhang do { \ 172cf053153SJunchao Zhang MUMPS_STRUC_T *inner; \ 173cf053153SJunchao Zhang PetscCall(PetscNew(&inner)); \ 174cf053153SJunchao Zhang outer->icntl = inner->icntl; \ 175cf053153SJunchao Zhang outer->cntl = inner->cntl; \ 176cf053153SJunchao Zhang outer->info = inner->info; \ 177cf053153SJunchao Zhang outer->infog = inner->infog; \ 178cf053153SJunchao Zhang outer->rinfo = inner->rinfo; \ 179cf053153SJunchao Zhang outer->rinfog = inner->rinfog; \ 180cf053153SJunchao Zhang outer->ooc_tmpdir = inner->ooc_tmpdir; \ 181cf053153SJunchao Zhang outer->ooc_prefix = inner->ooc_prefix; \ 182cf053153SJunchao Zhang /* the three field should never change after init */ \ 183cf053153SJunchao Zhang inner->comm_fortran = outer->comm_fortran; \ 184cf053153SJunchao Zhang inner->par = outer->par; \ 185cf053153SJunchao Zhang inner->sym = outer->sym; \ 186cf053153SJunchao Zhang outer->internal_id = inner; \ 187cf053153SJunchao Zhang } while (0) 188cf053153SJunchao Zhang 189cf053153SJunchao Zhang // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given <precision>, and link fields of the outer and the inner 190cf053153SJunchao Zhang static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision) 191cf053153SJunchao Zhang { 192cf053153SJunchao Zhang PetscFunctionBegin; 193cf053153SJunchao Zhang outer->precision = precision; 194cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 195cf053153SJunchao Zhang #if defined(PETSC_USE_COMPLEX) 196cf053153SJunchao Zhang if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(CMUMPS_STRUC_C, outer); 197cf053153SJunchao Zhang else AllocatInternalID(ZMUMPS_STRUC_C, outer); 198cf053153SJunchao Zhang #else 199cf053153SJunchao Zhang if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(SMUMPS_STRUC_C, outer); 200cf053153SJunchao Zhang else AllocatInternalID(DMUMPS_STRUC_C, outer); 201cf053153SJunchao Zhang #endif 202cf053153SJunchao Zhang #else 203cf053153SJunchao Zhang AllocatInternalID(MUMPS_STRUC_C, outer); 204cf053153SJunchao Zhang #endif 205cf053153SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 206cf053153SJunchao Zhang } 207cf053153SJunchao Zhang 208cf053153SJunchao Zhang #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \ 209cf053153SJunchao Zhang do { \ 210cf053153SJunchao Zhang MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \ 211cf053153SJunchao Zhang PetscCall(PetscFree(inner->a)); \ 212cf053153SJunchao Zhang PetscCall(PetscFree(inner->a_loc)); \ 213cf053153SJunchao Zhang PetscCall(PetscFree(inner->redrhs)); \ 214cf053153SJunchao Zhang PetscCall(PetscFree(inner->rhs)); \ 215cf053153SJunchao Zhang PetscCall(PetscFree(inner->rhs_sparse)); \ 216cf053153SJunchao Zhang PetscCall(PetscFree(inner->rhs_loc)); \ 217cf053153SJunchao Zhang PetscCall(PetscFree(inner->sol_loc)); \ 218cf053153SJunchao Zhang PetscCall(PetscFree(inner->schur)); \ 219cf053153SJunchao Zhang } while (0) 220cf053153SJunchao Zhang 221cf053153SJunchao Zhang static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer) 222cf053153SJunchao Zhang { 223cf053153SJunchao Zhang PetscFunctionBegin; 224cf053153SJunchao Zhang if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer 225cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 226cf053153SJunchao Zhang const PetscPrecision mumps_precision = outer->precision; 227cf053153SJunchao Zhang if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision 228cf053153SJunchao Zhang #if defined(PETSC_USE_COMPLEX) 229cf053153SJunchao Zhang if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer); 230cf053153SJunchao Zhang else FreeInternalIDFields(ZMUMPS_STRUC_C, outer); 231cf053153SJunchao Zhang #else 232cf053153SJunchao Zhang if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer); 233cf053153SJunchao Zhang else FreeInternalIDFields(DMUMPS_STRUC_C, outer); 234cf053153SJunchao Zhang #endif 235cf053153SJunchao Zhang } 236cf053153SJunchao Zhang #endif 237cf053153SJunchao Zhang PetscCall(PetscFree(outer->internal_id)); 238cf053153SJunchao Zhang } 239cf053153SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 240cf053153SJunchao Zhang } 241cf053153SJunchao Zhang 242cf053153SJunchao Zhang // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least <n> MumpsScalars in the given <precision> and return the address at <ma>. 243cf053153SJunchao Zhang // <convert> indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array. 244cf053153SJunchao Zhang // (For bravity, we use <ma> for array address and <m> for its length in MumpsScalar, though in code they should be <*ma> and <*m>) 245cf053153SJunchao Zhang // If <ma> already points to a buffer/array, on input <m> should be its length. Note the buffer might be freed if it is not big enough for this request. 246cf053153SJunchao Zhang // 247cf053153SJunchao Zhang // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same. 248cf053153SJunchao Zhang // 1) If they are different, a separate array will be made and its length and address will be provided at <m> and <ma> on output. 249cf053153SJunchao Zhang // 2) Otherwise, <pa> will be returned in <ma>, and <m> will be zero on output. 250cf053153SJunchao Zhang // 251cf053153SJunchao Zhang // 252cf053153SJunchao Zhang // Input parameters: 253cf053153SJunchao Zhang // + convert - whether to do PetscScalar to MumpsScalar conversion 254cf053153SJunchao Zhang // . n - length of the PetscScalar array 255cf053153SJunchao Zhang // . pa - [n]], points to the PetscScalar array 256cf053153SJunchao Zhang // . precision - precision of MumpsScalar 257cf053153SJunchao Zhang // . m - on input, length of an existing MumpsScalar array <ma> if any, otherwise *m is just zero. 258cf053153SJunchao Zhang // - ma - on input, an existing MumpsScalar array if any. 259cf053153SJunchao Zhang // 260cf053153SJunchao Zhang // Output parameters: 261cf053153SJunchao Zhang // + m - length of the MumpsScalar buffer at <ma> if MumpsScalar is different from PetscScalar, otherwise 0 262cf053153SJunchao Zhang // . ma - the MumpsScalar array, which could be an alias of <pa> when the two types are the same. 263cf053153SJunchao Zhang // 264cf053153SJunchao Zhang // Note: 265cf053153SJunchao Zhang // New memory, if allocated, is done via PetscMalloc1(), and is owned by caller. 266cf053153SJunchao Zhang static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma) 267cf053153SJunchao Zhang { 268cf053153SJunchao Zhang PetscFunctionBegin; 269cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 270cf053153SJunchao Zhang const PetscPrecision mumps_precision = precision; 271cf053153SJunchao Zhang PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision); 272cf053153SJunchao Zhang #if defined(PETSC_USE_COMPLEX) 273cf053153SJunchao Zhang if (mumps_precision != PETSC_SCALAR_PRECISION) { 274cf053153SJunchao Zhang if (mumps_precision == PETSC_PRECISION_SINGLE) { 275cf053153SJunchao Zhang if (*m < n) { 276cf053153SJunchao Zhang PetscCall(PetscFree(*ma)); 277cf053153SJunchao Zhang PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma)); 278cf053153SJunchao Zhang *m = n; 279cf053153SJunchao Zhang } 280cf053153SJunchao Zhang if (convert) { 281cf053153SJunchao Zhang CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma; 282cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) { 283cf053153SJunchao Zhang b[i].r = PetscRealPart(pa[i]); 284cf053153SJunchao Zhang b[i].i = PetscImaginaryPart(pa[i]); 285cf053153SJunchao Zhang }; 286cf053153SJunchao Zhang } 287cf053153SJunchao Zhang } else { 288cf053153SJunchao Zhang if (*m < n) { 289cf053153SJunchao Zhang PetscCall(PetscFree(*ma)); 290cf053153SJunchao Zhang PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma)); 291cf053153SJunchao Zhang *m = n; 292cf053153SJunchao Zhang } 293cf053153SJunchao Zhang if (convert) { 294cf053153SJunchao Zhang ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma; 295cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) { 296cf053153SJunchao Zhang b[i].r = PetscRealPart(pa[i]); 297cf053153SJunchao Zhang b[i].i = PetscImaginaryPart(pa[i]); 298cf053153SJunchao Zhang } 299cf053153SJunchao Zhang } 300cf053153SJunchao Zhang } 301cf053153SJunchao Zhang } 302cf053153SJunchao Zhang #else 303cf053153SJunchao Zhang if (mumps_precision != PETSC_SCALAR_PRECISION) { 304cf053153SJunchao Zhang if (mumps_precision == PETSC_PRECISION_SINGLE) { 305cf053153SJunchao Zhang if (*m < n) { 306cf053153SJunchao Zhang PetscCall(PetscFree(*ma)); 307cf053153SJunchao Zhang PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma)); 308cf053153SJunchao Zhang *m = n; 309cf053153SJunchao Zhang } 310cf053153SJunchao Zhang if (convert) { 311cf053153SJunchao Zhang SMUMPS_REAL *b = *(SMUMPS_REAL **)ma; 312cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 313cf053153SJunchao Zhang } 314cf053153SJunchao Zhang } else { 315cf053153SJunchao Zhang if (*m < n) { 316cf053153SJunchao Zhang PetscCall(PetscFree(*ma)); 317cf053153SJunchao Zhang PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma)); 318cf053153SJunchao Zhang *m = n; 319cf053153SJunchao Zhang } 320cf053153SJunchao Zhang if (convert) { 321cf053153SJunchao Zhang DMUMPS_REAL *b = *(DMUMPS_REAL **)ma; 322cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 323cf053153SJunchao Zhang } 324cf053153SJunchao Zhang } 325cf053153SJunchao Zhang } 326cf053153SJunchao Zhang #endif 327cf053153SJunchao Zhang else 328cf053153SJunchao Zhang #endif 329cf053153SJunchao Zhang { 330cf053153SJunchao Zhang if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any 331cf053153SJunchao Zhang *ma = (void *)pa; // same precision, make them alias 332cf053153SJunchao Zhang *m = 0; 333cf053153SJunchao Zhang } 334cf053153SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 335cf053153SJunchao Zhang } 336cf053153SJunchao Zhang 337cf053153SJunchao Zhang // Cast a MumpsScalar array <ma[n]> in <mumps_precision> to a PetscScalar array at address <pa>. 338cf053153SJunchao Zhang // 339cf053153SJunchao Zhang // 1) If the two types are different, cast array elements. 340cf053153SJunchao Zhang // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op. 341cf053153SJunchao Zhang static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa) 342cf053153SJunchao Zhang { 343cf053153SJunchao Zhang PetscFunctionBegin; 344cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 345cf053153SJunchao Zhang if (mumps_precision != PETSC_SCALAR_PRECISION) { 346cf053153SJunchao Zhang #if defined(PETSC_USE_COMPLEX) 347cf053153SJunchao Zhang if (mumps_precision == PETSC_PRECISION_SINGLE) { 348cf053153SJunchao Zhang PetscReal *a = (PetscReal *)pa; 349cf053153SJunchao Zhang const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma; 350cf053153SJunchao Zhang for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i]; 351cf053153SJunchao Zhang } else { 352cf053153SJunchao Zhang PetscReal *a = (PetscReal *)pa; 353cf053153SJunchao Zhang const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma; 354cf053153SJunchao Zhang for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i]; 355cf053153SJunchao Zhang } 356cf053153SJunchao Zhang #else 357cf053153SJunchao Zhang if (mumps_precision == PETSC_PRECISION_SINGLE) { 358cf053153SJunchao Zhang const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma; 359cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) pa[i] = b[i]; 360cf053153SJunchao Zhang } else { 361cf053153SJunchao Zhang const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma; 362cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) pa[i] = b[i]; 363cf053153SJunchao Zhang } 364cf053153SJunchao Zhang #endif 365cf053153SJunchao Zhang } else 366cf053153SJunchao Zhang #endif 367cf053153SJunchao Zhang PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n)); 368cf053153SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 369cf053153SJunchao Zhang } 370cf053153SJunchao Zhang 371cf053153SJunchao Zhang // Cast a PetscScalar array <pa[n]> to a MumpsScalar array in the given <mumps_precision> at address <ma>. 372cf053153SJunchao Zhang // 373cf053153SJunchao Zhang // 1) If the two types are different, cast array elements. 374cf053153SJunchao Zhang // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op. 375cf053153SJunchao Zhang static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma) 376cf053153SJunchao Zhang { 377cf053153SJunchao Zhang PetscFunctionBegin; 378cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 379cf053153SJunchao Zhang if (mumps_precision != PETSC_SCALAR_PRECISION) { 380cf053153SJunchao Zhang #if defined(PETSC_USE_COMPLEX) 381cf053153SJunchao Zhang if (mumps_precision == PETSC_PRECISION_SINGLE) { 382cf053153SJunchao Zhang CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma; 383cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) { 384cf053153SJunchao Zhang b[i].r = PetscRealPart(pa[i]); 385cf053153SJunchao Zhang b[i].i = PetscImaginaryPart(pa[i]); 386cf053153SJunchao Zhang } 387cf053153SJunchao Zhang } else { 388cf053153SJunchao Zhang ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma; 389cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) { 390cf053153SJunchao Zhang b[i].r = PetscRealPart(pa[i]); 391cf053153SJunchao Zhang b[i].i = PetscImaginaryPart(pa[i]); 392cf053153SJunchao Zhang } 393cf053153SJunchao Zhang } 394cf053153SJunchao Zhang #else 395cf053153SJunchao Zhang if (mumps_precision == PETSC_PRECISION_SINGLE) { 396cf053153SJunchao Zhang SMUMPS_REAL *b = (SMUMPS_REAL *)ma; 397cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 398cf053153SJunchao Zhang } else { 399cf053153SJunchao Zhang DMUMPS_REAL *b = (DMUMPS_REAL *)ma; 400cf053153SJunchao Zhang for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 401cf053153SJunchao Zhang } 402cf053153SJunchao Zhang #endif 403cf053153SJunchao Zhang } else 404cf053153SJunchao Zhang #endif 405cf053153SJunchao Zhang PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n)); 406cf053153SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 407cf053153SJunchao Zhang } 408cf053153SJunchao Zhang 409cf053153SJunchao Zhang static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id) 410cf053153SJunchao Zhang { 411cf053153SJunchao Zhang return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT; 412cf053153SJunchao Zhang } 413cf053153SJunchao Zhang 414cf053153SJunchao Zhang #define PreMumpsCall(inner, outer, mumpsscalar) \ 415cf053153SJunchao Zhang do { \ 416cf053153SJunchao Zhang inner->job = outer->job; \ 417cf053153SJunchao Zhang inner->n = outer->n; \ 418cf053153SJunchao Zhang inner->nblk = outer->nblk; \ 419cf053153SJunchao Zhang inner->nnz = outer->nnz; \ 420cf053153SJunchao Zhang inner->irn = outer->irn; \ 421cf053153SJunchao Zhang inner->jcn = outer->jcn; \ 422cf053153SJunchao Zhang inner->a = (mumpsscalar *)outer->a; \ 423cf053153SJunchao Zhang inner->nnz_loc = outer->nnz_loc; \ 424cf053153SJunchao Zhang inner->irn_loc = outer->irn_loc; \ 425cf053153SJunchao Zhang inner->jcn_loc = outer->jcn_loc; \ 426cf053153SJunchao Zhang inner->a_loc = (mumpsscalar *)outer->a_loc; \ 427cf053153SJunchao Zhang inner->blkptr = outer->blkptr; \ 428cf053153SJunchao Zhang inner->blkvar = outer->blkvar; \ 429cf053153SJunchao Zhang inner->perm_in = outer->perm_in; \ 430cf053153SJunchao Zhang inner->rhs = (mumpsscalar *)outer->rhs; \ 431cf053153SJunchao Zhang inner->redrhs = (mumpsscalar *)outer->redrhs; \ 432cf053153SJunchao Zhang inner->rhs_sparse = (mumpsscalar *)outer->rhs_sparse; \ 433cf053153SJunchao Zhang inner->sol_loc = (mumpsscalar *)outer->sol_loc; \ 434cf053153SJunchao Zhang inner->rhs_loc = (mumpsscalar *)outer->rhs_loc; \ 435cf053153SJunchao Zhang inner->irhs_sparse = outer->irhs_sparse; \ 436cf053153SJunchao Zhang inner->irhs_ptr = outer->irhs_ptr; \ 437cf053153SJunchao Zhang inner->isol_loc = outer->isol_loc; \ 438cf053153SJunchao Zhang inner->irhs_loc = outer->irhs_loc; \ 439cf053153SJunchao Zhang inner->nrhs = outer->nrhs; \ 440cf053153SJunchao Zhang inner->lrhs = outer->lrhs; \ 441cf053153SJunchao Zhang inner->lredrhs = outer->lredrhs; \ 442cf053153SJunchao Zhang inner->nz_rhs = outer->nz_rhs; \ 443cf053153SJunchao Zhang inner->lsol_loc = outer->lsol_loc; \ 444cf053153SJunchao Zhang inner->nloc_rhs = outer->nloc_rhs; \ 445cf053153SJunchao Zhang inner->lrhs_loc = outer->lrhs_loc; \ 446cf053153SJunchao Zhang inner->schur_lld = outer->schur_lld; \ 447cf053153SJunchao Zhang inner->size_schur = outer->size_schur; \ 448cf053153SJunchao Zhang inner->listvar_schur = outer->listvar_schur; \ 449cf053153SJunchao Zhang inner->schur = (mumpsscalar *)outer->schur; \ 450cf053153SJunchao Zhang } while (0) 451cf053153SJunchao Zhang 452cf053153SJunchao Zhang #define PostMumpsCall(inner, outer) \ 453cf053153SJunchao Zhang do { \ 454cf053153SJunchao Zhang outer->pivnul_list = inner->pivnul_list; \ 455cf053153SJunchao Zhang outer->mapping = inner->mapping; \ 456cf053153SJunchao Zhang } while (0) 457cf053153SJunchao Zhang 458cf053153SJunchao Zhang // Entry for PETSc to call mumps 459cf053153SJunchao Zhang static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer) 460cf053153SJunchao Zhang { 461cf053153SJunchao Zhang PetscFunctionBegin; 462cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 463cf053153SJunchao Zhang #if defined(PETSC_USE_COMPLEX) 464cf053153SJunchao Zhang if (outer->precision == PETSC_PRECISION_SINGLE) { 465cf053153SJunchao Zhang CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id; 466cf053153SJunchao Zhang PreMumpsCall(inner, outer, CMUMPS_COMPLEX); 467cf053153SJunchao Zhang PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner)); 468cf053153SJunchao Zhang PostMumpsCall(inner, outer); 469cf053153SJunchao Zhang } else { 470cf053153SJunchao Zhang ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id; 471cf053153SJunchao Zhang PreMumpsCall(inner, outer, ZMUMPS_COMPLEX); 472cf053153SJunchao Zhang PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner)); 473cf053153SJunchao Zhang PostMumpsCall(inner, outer); 474cf053153SJunchao Zhang } 475cf053153SJunchao Zhang #else 476cf053153SJunchao Zhang if (outer->precision == PETSC_PRECISION_SINGLE) { 477cf053153SJunchao Zhang SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id; 478cf053153SJunchao Zhang PreMumpsCall(inner, outer, SMUMPS_REAL); 479cf053153SJunchao Zhang PetscStackCallExternalVoid("smumps_c", smumps_c(inner)); 480cf053153SJunchao Zhang PostMumpsCall(inner, outer); 481cf053153SJunchao Zhang } else { 482cf053153SJunchao Zhang DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id; 483cf053153SJunchao Zhang PreMumpsCall(inner, outer, DMUMPS_REAL); 484cf053153SJunchao Zhang PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner)); 485cf053153SJunchao Zhang PostMumpsCall(inner, outer); 486cf053153SJunchao Zhang } 487cf053153SJunchao Zhang #endif 488cf053153SJunchao Zhang #else 489cf053153SJunchao Zhang MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id; 490cf053153SJunchao Zhang PreMumpsCall(inner, outer, MumpsScalar); 491cf053153SJunchao Zhang PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner)); 492cf053153SJunchao Zhang PostMumpsCall(inner, outer); 493cf053153SJunchao Zhang #endif 494cf053153SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 495cf053153SJunchao Zhang } 496cf053153SJunchao Zhang 497cf053153SJunchao Zhang /* macros s.t. indices match MUMPS documentation */ 498cf053153SJunchao Zhang #define ICNTL(I) icntl[(I) - 1] 499cf053153SJunchao Zhang #define INFOG(I) infog[(I) - 1] 500cf053153SJunchao Zhang #define INFO(I) info[(I) - 1] 501cf053153SJunchao Zhang 502cf053153SJunchao Zhang // Get a value from a MumpsScalar array, which is the <F> field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I! 503cf053153SJunchao Zhang #if defined(PETSC_USE_COMPLEX) 504cf053153SJunchao Zhang #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i) 505cf053153SJunchao Zhang #else 506cf053153SJunchao Zhang #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I]) 507cf053153SJunchao Zhang #endif 508cf053153SJunchao Zhang 509cf053153SJunchao Zhang // Get a value from MumpsReal arrays. The value is convertible to PetscReal. 510cf053153SJunchao Zhang #define ID_CNTL_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1]) 511cf053153SJunchao Zhang #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1]) 512cf053153SJunchao Zhang #define ID_RINFO_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1]) 513cf053153SJunchao Zhang 514cf053153SJunchao Zhang // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal <VAL> 515cf053153SJunchao Zhang #define ID_CNTL_SET(ID, I, VAL) \ 516cf053153SJunchao Zhang do { \ 517cf053153SJunchao Zhang if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \ 518cf053153SJunchao Zhang else ((double *)(ID).cntl)[(I) - 1] = (VAL); \ 519cf053153SJunchao Zhang } while (0) 520cf053153SJunchao Zhang 521217d3b1eSJunchao Zhang /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 5223ab56b82SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 5233ab56b82SJunchao Zhang #define PetscMUMPS_c(mumps) \ 5243ab56b82SJunchao Zhang do { \ 5253ab56b82SJunchao Zhang if (mumps->use_petsc_omp_support) { \ 5263ab56b82SJunchao Zhang if (mumps->is_omp_master) { \ 5279566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 52814ffdc6fSStefano Zampini PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 529cf053153SJunchao Zhang PetscCall(PetscCallMumps_Private(&mumps->id)); \ 53014ffdc6fSStefano Zampini PetscCall(PetscFPTrapPop()); \ 5319566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 5323ab56b82SJunchao Zhang } \ 5339566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 534c3714a1dSJunchao Zhang /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 535c3714a1dSJunchao Zhang to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 536c3714a1dSJunchao Zhang an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 537c3714a1dSJunchao Zhang omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 538c3714a1dSJunchao Zhang */ \ 539cf053153SJunchao Zhang SMUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \ 540cf053153SJunchao Zhang PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 541cf053153SJunchao Zhang PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 542cf053153SJunchao Zhang PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \ 543cf053153SJunchao Zhang PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \ 5443ab56b82SJunchao Zhang } else { \ 54514ffdc6fSStefano Zampini PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 546cf053153SJunchao Zhang PetscCall(PetscCallMumps_Private(&mumps->id)); \ 54714ffdc6fSStefano Zampini PetscCall(PetscFPTrapPop()); \ 5483ab56b82SJunchao Zhang } \ 5493ab56b82SJunchao Zhang } while (0) 5503ab56b82SJunchao Zhang #else 5513ab56b82SJunchao Zhang #define PetscMUMPS_c(mumps) \ 552d71ae5a4SJacob Faibussowitsch do { \ 55314ffdc6fSStefano Zampini PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 554cf053153SJunchao Zhang PetscCall(PetscCallMumps_Private(&mumps->id)); \ 55514ffdc6fSStefano Zampini PetscCall(PetscFPTrapPop()); \ 556d71ae5a4SJacob Faibussowitsch } while (0) 5573ab56b82SJunchao Zhang #endif 5583ab56b82SJunchao Zhang 559a6053eceSJunchao Zhang typedef struct Mat_MUMPS Mat_MUMPS; 560a6053eceSJunchao Zhang struct Mat_MUMPS { 561cf053153SJunchao Zhang XMUMPS_STRUC_C id; 5622907cef9SHong Zhang 563397b6df1SKris Buschelman MatStructure matstruc; 5642d4298aeSJunchao Zhang PetscMPIInt myid, petsc_size; 565a6053eceSJunchao Zhang PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 566a6053eceSJunchao Zhang PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 5676497c311SBarry Smith PetscCount nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 568a6053eceSJunchao Zhang PetscMUMPSInt sym; 5692d4298aeSJunchao Zhang MPI_Comm mumps_comm; 570413bcc21SPierre Jolivet PetscMUMPSInt *ICNTL_pre; 571413bcc21SPierre Jolivet PetscReal *CNTL_pre; 572a6053eceSJunchao Zhang PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 573801fbe65SHong Zhang VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 57425aac85cSJunchao Zhang PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 575cf053153SJunchao Zhang PetscMUMPSInt ICNTL26; 57667602552SJunchao Zhang PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 57767602552SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 57867602552SJunchao Zhang PetscInt *rhs_nrow, max_nrhs; 57967602552SJunchao Zhang PetscMPIInt *rhs_recvcounts, *rhs_disps; 58067602552SJunchao Zhang PetscScalar *rhs_loc, *rhs_recvbuf; 58167602552SJunchao Zhang #endif 582801fbe65SHong Zhang Vec b_seq, x_seq; 583a6053eceSJunchao Zhang PetscInt ninfo, *info; /* which INFO to display */ 584b5fa320bSStefano Zampini PetscInt sizeredrhs; 58559ac8732SStefano Zampini PetscScalar *schur_sol; 58659ac8732SStefano Zampini PetscInt schur_sizesol; 587cf053153SJunchao Zhang PetscScalar *redrhs; // buffer in PetscScalar in case MumpsScalar is in a different precision 588a6053eceSJunchao Zhang PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 5896497c311SBarry Smith PetscCount cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 590a6053eceSJunchao Zhang PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 5912205254eSKarl Rupp 5929d0448ceSStefano Zampini /* Support for MATNEST */ 5939d0448ceSStefano Zampini PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 5946497c311SBarry Smith PetscCount *nest_vals_start; 5959d0448ceSStefano Zampini PetscScalar *nest_vals; 5969d0448ceSStefano Zampini 597a6053eceSJunchao Zhang /* stuff used by petsc/mumps OpenMP support*/ 5983ab56b82SJunchao Zhang PetscBool use_petsc_omp_support; 599da81f932SPierre Jolivet PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 600f0b74427SPierre Jolivet MPI_Comm petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */ 6016497c311SBarry Smith PetscCount *recvcount; /* a collection of nnz on omp_master */ 602a6053eceSJunchao Zhang PetscMPIInt tag, omp_comm_size; 6033ab56b82SJunchao Zhang PetscBool is_omp_master; /* is this rank the master of omp_comm */ 604a6053eceSJunchao Zhang MPI_Request *reqs; 605a6053eceSJunchao Zhang }; 6063ab56b82SJunchao Zhang 607a6053eceSJunchao Zhang /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 608a6053eceSJunchao Zhang Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 609a6053eceSJunchao Zhang */ 610d2a308c1SPierre Jolivet static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 611d71ae5a4SJacob Faibussowitsch { 6126497c311SBarry Smith PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */ 613f0c56d0fSKris Buschelman 614a6053eceSJunchao Zhang PetscFunctionBegin; 615a6053eceSJunchao Zhang #if defined(PETSC_USE_64BIT_INDICES) 616a6053eceSJunchao Zhang { 617a6053eceSJunchao Zhang PetscInt i; 618a6053eceSJunchao Zhang if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 6199566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ia_alloc)); 6209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 621a6053eceSJunchao Zhang mumps->cur_ilen = nrow + 1; 622a6053eceSJunchao Zhang } 623a6053eceSJunchao Zhang if (nnz > mumps->cur_jlen) { 6249566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ja_alloc)); 6259566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 626a6053eceSJunchao Zhang mumps->cur_jlen = nnz; 627a6053eceSJunchao Zhang } 628f4f49eeaSPierre Jolivet for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i])); 629f4f49eeaSPierre Jolivet for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i])); 630a6053eceSJunchao Zhang *ia_mumps = mumps->ia_alloc; 631a6053eceSJunchao Zhang *ja_mumps = mumps->ja_alloc; 632a6053eceSJunchao Zhang } 633a6053eceSJunchao Zhang #else 634a6053eceSJunchao Zhang *ia_mumps = ia; 635a6053eceSJunchao Zhang *ja_mumps = ja; 636a6053eceSJunchao Zhang #endif 6379566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 6383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 639a6053eceSJunchao Zhang } 640b24902e0SBarry Smith 641d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 642d71ae5a4SJacob Faibussowitsch { 643b5fa320bSStefano Zampini PetscFunctionBegin; 6449566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.listvar_schur)); 645cf053153SJunchao Zhang PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID() 6469566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->schur_sol)); 64759ac8732SStefano Zampini mumps->id.size_schur = 0; 648b3cb21ddSStefano Zampini mumps->id.schur_lld = 0; 649cf053153SJunchao Zhang if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built 6503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 65159ac8732SStefano Zampini } 65259ac8732SStefano Zampini 653b3cb21ddSStefano Zampini /* solve with rhs in mumps->id.redrhs and return in the same location */ 654d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 655d71ae5a4SJacob Faibussowitsch { 656b3cb21ddSStefano Zampini Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 657cf053153SJunchao Zhang Mat S, B, X; // solve S*X = B; all three matrices are dense 658b3cb21ddSStefano Zampini MatFactorSchurStatus schurstatus; 659b3cb21ddSStefano Zampini PetscInt sizesol; 660cf053153SJunchao Zhang const PetscScalar *xarray; 66159ac8732SStefano Zampini 66259ac8732SStefano Zampini PetscFunctionBegin; 6639566063dSJacob Faibussowitsch PetscCall(MatFactorFactorizeSchurComplement(F)); 6649566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 665cf053153SJunchao Zhang PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs)); 666cf053153SJunchao Zhang 667cf053153SJunchao Zhang PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B)); 6689566063dSJacob Faibussowitsch PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 669a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6709566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(B, S->boundtocpu)); 671a3d589ffSStefano Zampini #endif 672b3cb21ddSStefano Zampini switch (schurstatus) { 673d71ae5a4SJacob Faibussowitsch case MAT_FACTOR_SCHUR_FACTORED: 674cf053153SJunchao Zhang PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X)); 675d71ae5a4SJacob Faibussowitsch PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 676a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6779566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(X, S->boundtocpu)); 678a3d589ffSStefano Zampini #endif 679b3cb21ddSStefano Zampini if (!mumps->id.ICNTL(9)) { /* transpose solve */ 6809566063dSJacob Faibussowitsch PetscCall(MatMatSolveTranspose(S, B, X)); 68159ac8732SStefano Zampini } else { 6829566063dSJacob Faibussowitsch PetscCall(MatMatSolve(S, B, X)); 68359ac8732SStefano Zampini } 684b3cb21ddSStefano Zampini break; 685b3cb21ddSStefano Zampini case MAT_FACTOR_SCHUR_INVERTED: 686b3cb21ddSStefano Zampini sizesol = mumps->id.nrhs * mumps->id.size_schur; 68759ac8732SStefano Zampini if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 6889566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->schur_sol)); 6899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 69059ac8732SStefano Zampini mumps->schur_sizesol = sizesol; 691b5fa320bSStefano Zampini } 6929566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 6939566063dSJacob Faibussowitsch PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 694a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6959566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(X, S->boundtocpu)); 696a3d589ffSStefano Zampini #endif 6979566063dSJacob Faibussowitsch PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 69859ac8732SStefano Zampini if (!mumps->id.ICNTL(9)) { /* transpose solve */ 6999566063dSJacob Faibussowitsch PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 700b5fa320bSStefano Zampini } else { 7019566063dSJacob Faibussowitsch PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 702b5fa320bSStefano Zampini } 7039566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(X)); 7049566063dSJacob Faibussowitsch PetscCall(MatProductSymbolic(X)); 7059566063dSJacob Faibussowitsch PetscCall(MatProductNumeric(X)); 7064417c5e8SHong Zhang 7079566063dSJacob Faibussowitsch PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 708b3cb21ddSStefano Zampini break; 709d71ae5a4SJacob Faibussowitsch default: 710d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 71159ac8732SStefano Zampini } 712cf053153SJunchao Zhang // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device. 713cf053153SJunchao Zhang PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs. 714cf053153SJunchao Zhang PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs)); 715cf053153SJunchao Zhang PetscCall(MatDenseRestoreArrayRead(X, &xarray)); 7169566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 7179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 7189566063dSJacob Faibussowitsch PetscCall(MatDestroy(&X)); 7193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 720b5fa320bSStefano Zampini } 721b5fa320bSStefano Zampini 722d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 723d71ae5a4SJacob Faibussowitsch { 724b3cb21ddSStefano Zampini Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 725b5fa320bSStefano Zampini 726b5fa320bSStefano Zampini PetscFunctionBegin; 727b5fa320bSStefano Zampini if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 7283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 729b5fa320bSStefano Zampini } 730b8f61ee1SStefano Zampini if (!expansion) { /* prepare for the condensation step */ 731b5fa320bSStefano Zampini PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 732b5fa320bSStefano Zampini /* allocate MUMPS internal array to store reduced right-hand sides */ 733b5fa320bSStefano Zampini if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 734b5fa320bSStefano Zampini mumps->id.lredrhs = mumps->id.size_schur; 735b5fa320bSStefano Zampini mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 736cf053153SJunchao Zhang if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs)); 737cf053153SJunchao Zhang PetscCall(PetscFree(mumps->redrhs)); 738cf053153SJunchao Zhang PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs)); 739cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs)); 740b5fa320bSStefano Zampini } 741b5fa320bSStefano Zampini } else { /* prepare for the expansion step */ 742cf053153SJunchao Zhang PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */ 743b5fa320bSStefano Zampini mumps->id.ICNTL(26) = 2; /* expansion phase */ 7443ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 745cf053153SJunchao Zhang PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 746b5fa320bSStefano Zampini /* restore defaults */ 747b5fa320bSStefano Zampini mumps->id.ICNTL(26) = -1; 748d3d598ffSStefano Zampini /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 749d3d598ffSStefano Zampini if (mumps->id.nrhs > 1) { 750cf053153SJunchao Zhang if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs)); 751cf053153SJunchao Zhang PetscCall(PetscFree(mumps->redrhs)); 752cf053153SJunchao Zhang mumps->id.redrhs_len = 0; 753d3d598ffSStefano Zampini mumps->id.lredrhs = 0; 754d3d598ffSStefano Zampini mumps->sizeredrhs = 0; 755d3d598ffSStefano Zampini } 756b5fa320bSStefano Zampini } 7573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 758b5fa320bSStefano Zampini } 759b5fa320bSStefano Zampini 760397b6df1SKris Buschelman /* 761f0b74427SPierre Jolivet MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz] 762d341cd04SHong Zhang 763397b6df1SKris Buschelman input: 76475480915SPierre Jolivet A - matrix in aij,baij or sbaij format 765397b6df1SKris Buschelman shift - 0: C style output triple; 1: Fortran style output triple. 766bccb9932SShri Abhyankar reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 767bccb9932SShri Abhyankar MAT_REUSE_MATRIX: only the values in v array are updated 768397b6df1SKris Buschelman output: 769397b6df1SKris Buschelman nnz - dim of r, c, and v (number of local nonzero entries of A) 770397b6df1SKris Buschelman r, c, v - row and col index, matrix values (matrix triples) 771eb9baa12SBarry Smith 772eb9baa12SBarry Smith The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 7737ee00b23SStefano Zampini freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 774eb9baa12SBarry Smith that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 775eb9baa12SBarry Smith 776397b6df1SKris Buschelman */ 77716ebf90aSShri Abhyankar 77866976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 779d71ae5a4SJacob Faibussowitsch { 780a3d589ffSStefano Zampini const PetscScalar *av; 781185f6596SHong Zhang const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 7826497c311SBarry Smith PetscCount nz, rnz, k; 783a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 78416ebf90aSShri Abhyankar Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 785397b6df1SKris Buschelman 786397b6df1SKris Buschelman PetscFunctionBegin; 7879566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A, &av)); 788bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 7892205254eSKarl Rupp nz = aa->nz; 7902205254eSKarl Rupp ai = aa->i; 7912205254eSKarl Rupp aj = aa->j; 7929566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 7936497c311SBarry Smith for (PetscCount i = k = 0; i < M; i++) { 79416ebf90aSShri Abhyankar rnz = ai[i + 1] - ai[i]; 79567877ebaSShri Abhyankar ajj = aj + ai[i]; 7966497c311SBarry Smith for (PetscCount j = 0; j < rnz; j++) { 7979566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 7989566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 799a6053eceSJunchao Zhang k++; 80016ebf90aSShri Abhyankar } 80116ebf90aSShri Abhyankar } 80250c845baSStefano Zampini mumps->val = (PetscScalar *)av; 803a6053eceSJunchao Zhang mumps->irn = row; 804a6053eceSJunchao Zhang mumps->jcn = col; 805a6053eceSJunchao Zhang mumps->nnz = nz; 806127cd276SPierre Jolivet } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 807127cd276SPierre Jolivet else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 8089566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 8093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 81016ebf90aSShri Abhyankar } 811397b6df1SKris Buschelman 81266976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 813d71ae5a4SJacob Faibussowitsch { 8146497c311SBarry Smith PetscCount nz, i, j, k, r; 8157ee00b23SStefano Zampini Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 816a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 8177ee00b23SStefano Zampini 8187ee00b23SStefano Zampini PetscFunctionBegin; 8197ee00b23SStefano Zampini nz = a->sliidx[a->totalslices]; 82050c845baSStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 8219566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 822a6053eceSJunchao Zhang for (i = k = 0; i < a->totalslices; i++) { 82348a46eb9SPierre Jolivet for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 8247ee00b23SStefano Zampini } 8259566063dSJacob Faibussowitsch for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 826a6053eceSJunchao Zhang mumps->irn = row; 827a6053eceSJunchao Zhang mumps->jcn = col; 828a6053eceSJunchao Zhang mumps->nnz = nz; 82950c845baSStefano Zampini mumps->val = a->val; 830127cd276SPierre Jolivet } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 831127cd276SPierre Jolivet else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 8323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8337ee00b23SStefano Zampini } 8347ee00b23SStefano Zampini 83566976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 836d71ae5a4SJacob Faibussowitsch { 83767877ebaSShri Abhyankar Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 83833d57670SJed Brown const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 8396497c311SBarry Smith PetscCount M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 840a6053eceSJunchao Zhang PetscInt bs; 841a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 84267877ebaSShri Abhyankar 84367877ebaSShri Abhyankar PetscFunctionBegin; 84450c845baSStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 8459566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 84633d57670SJed Brown M = A->rmap->N / bs; 8479371c9d4SSatish Balay ai = aa->i; 8489371c9d4SSatish Balay aj = aa->j; 8499566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 85067877ebaSShri Abhyankar for (i = 0; i < M; i++) { 85167877ebaSShri Abhyankar ajj = aj + ai[i]; 85267877ebaSShri Abhyankar rnz = ai[i + 1] - ai[i]; 85367877ebaSShri Abhyankar for (k = 0; k < rnz; k++) { 85467877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 85567877ebaSShri Abhyankar for (m = 0; m < bs; m++) { 8569566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 8579566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 858a6053eceSJunchao Zhang idx++; 85967877ebaSShri Abhyankar } 86067877ebaSShri Abhyankar } 86167877ebaSShri Abhyankar } 86267877ebaSShri Abhyankar } 863a6053eceSJunchao Zhang mumps->irn = row; 864a6053eceSJunchao Zhang mumps->jcn = col; 865a6053eceSJunchao Zhang mumps->nnz = nz; 86650c845baSStefano Zampini mumps->val = aa->a; 867127cd276SPierre Jolivet } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 868127cd276SPierre Jolivet else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 8693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 87067877ebaSShri Abhyankar } 87167877ebaSShri Abhyankar 87266976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 873d71ae5a4SJacob Faibussowitsch { 87475480915SPierre Jolivet const PetscInt *ai, *aj, *ajj; 875a6053eceSJunchao Zhang PetscInt bs; 8766497c311SBarry Smith PetscCount nz, rnz, i, j, k, m; 877a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 87875480915SPierre Jolivet PetscScalar *val; 87916ebf90aSShri Abhyankar Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 88075480915SPierre Jolivet const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 88138548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 882b94d7dedSBarry Smith PetscBool isset, hermitian; 88338548759SBarry Smith #endif 88416ebf90aSShri Abhyankar 88516ebf90aSShri Abhyankar PetscFunctionBegin; 88638548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 887b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 888b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 88938548759SBarry Smith #endif 8902205254eSKarl Rupp ai = aa->i; 8912205254eSKarl Rupp aj = aa->j; 8929566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 89375480915SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 8946497c311SBarry Smith const PetscCount alloc_size = aa->nz * bs2; 895f3fa974cSJacob Faibussowitsch 896f3fa974cSJacob Faibussowitsch PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 897a6053eceSJunchao Zhang if (bs > 1) { 898f3fa974cSJacob Faibussowitsch PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 899a6053eceSJunchao Zhang mumps->val = mumps->val_alloc; 90075480915SPierre Jolivet } else { 901a6053eceSJunchao Zhang mumps->val = aa->a; 90275480915SPierre Jolivet } 903a6053eceSJunchao Zhang mumps->irn = row; 904a6053eceSJunchao Zhang mumps->jcn = col; 905a6053eceSJunchao Zhang } else { 906a6053eceSJunchao Zhang row = mumps->irn; 907a6053eceSJunchao Zhang col = mumps->jcn; 908a6053eceSJunchao Zhang } 909a6053eceSJunchao Zhang val = mumps->val; 910185f6596SHong Zhang 91116ebf90aSShri Abhyankar nz = 0; 912a81fe166SPierre Jolivet if (bs > 1) { 91375480915SPierre Jolivet for (i = 0; i < mbs; i++) { 91416ebf90aSShri Abhyankar rnz = ai[i + 1] - ai[i]; 91567877ebaSShri Abhyankar ajj = aj + ai[i]; 91675480915SPierre Jolivet for (j = 0; j < rnz; j++) { 91775480915SPierre Jolivet for (k = 0; k < bs; k++) { 91875480915SPierre Jolivet for (m = 0; m < bs; m++) { 919ec4f40fdSPierre Jolivet if (ajj[j] > i || k >= m) { 92075480915SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 9219566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 9229566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 92375480915SPierre Jolivet } 92475480915SPierre Jolivet val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 92575480915SPierre Jolivet } 92675480915SPierre Jolivet } 92775480915SPierre Jolivet } 92875480915SPierre Jolivet } 92975480915SPierre Jolivet } 930a81fe166SPierre Jolivet } else if (reuse == MAT_INITIAL_MATRIX) { 931a81fe166SPierre Jolivet for (i = 0; i < mbs; i++) { 932a81fe166SPierre Jolivet rnz = ai[i + 1] - ai[i]; 933a81fe166SPierre Jolivet ajj = aj + ai[i]; 934a81fe166SPierre Jolivet for (j = 0; j < rnz; j++) { 9359566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 9369566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 937a6053eceSJunchao Zhang nz++; 938a81fe166SPierre Jolivet } 939a81fe166SPierre Jolivet } 9406497c311SBarry Smith PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz); 941127cd276SPierre Jolivet } else if (mumps->nest_vals) 942127cd276SPierre Jolivet PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 943127cd276SPierre Jolivet else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 944a6053eceSJunchao Zhang if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 9453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 94616ebf90aSShri Abhyankar } 94716ebf90aSShri Abhyankar 94866976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 949d71ae5a4SJacob Faibussowitsch { 95067877ebaSShri Abhyankar const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 9516497c311SBarry Smith PetscCount nz, rnz, i, j; 95267877ebaSShri Abhyankar const PetscScalar *av, *v1; 95316ebf90aSShri Abhyankar PetscScalar *val; 954a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 955829b1710SHong Zhang Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 95629b521d4Sstefano_zampini PetscBool missing; 95738548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 958b94d7dedSBarry Smith PetscBool hermitian, isset; 95938548759SBarry Smith #endif 96016ebf90aSShri Abhyankar 96116ebf90aSShri Abhyankar PetscFunctionBegin; 96238548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 963b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 964b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 96538548759SBarry Smith #endif 9669566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A, &av)); 9679371c9d4SSatish Balay ai = aa->i; 9689371c9d4SSatish Balay aj = aa->j; 96916ebf90aSShri Abhyankar adiag = aa->diag; 9709566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 971bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 9727ee00b23SStefano Zampini /* count nz in the upper triangular part of A */ 973829b1710SHong Zhang nz = 0; 97429b521d4Sstefano_zampini if (missing) { 97529b521d4Sstefano_zampini for (i = 0; i < M; i++) { 97629b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 97729b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 97829b521d4Sstefano_zampini if (aj[j] < i) continue; 97929b521d4Sstefano_zampini nz++; 98029b521d4Sstefano_zampini } 98129b521d4Sstefano_zampini } else { 98229b521d4Sstefano_zampini nz += ai[i + 1] - adiag[i]; 98329b521d4Sstefano_zampini } 98429b521d4Sstefano_zampini } 98529b521d4Sstefano_zampini } else { 986829b1710SHong Zhang for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 98729b521d4Sstefano_zampini } 9889566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 9899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 990a6053eceSJunchao Zhang mumps->nnz = nz; 991a6053eceSJunchao Zhang mumps->irn = row; 992a6053eceSJunchao Zhang mumps->jcn = col; 993a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 994185f6596SHong Zhang 99516ebf90aSShri Abhyankar nz = 0; 99629b521d4Sstefano_zampini if (missing) { 99729b521d4Sstefano_zampini for (i = 0; i < M; i++) { 99829b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 99929b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 100029b521d4Sstefano_zampini if (aj[j] < i) continue; 10019566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 10029566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 100329b521d4Sstefano_zampini val[nz] = av[j]; 100429b521d4Sstefano_zampini nz++; 100529b521d4Sstefano_zampini } 100629b521d4Sstefano_zampini } else { 100729b521d4Sstefano_zampini rnz = ai[i + 1] - adiag[i]; 100829b521d4Sstefano_zampini ajj = aj + adiag[i]; 100929b521d4Sstefano_zampini v1 = av + adiag[i]; 101029b521d4Sstefano_zampini for (j = 0; j < rnz; j++) { 10119566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 10129566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 1013a6053eceSJunchao Zhang val[nz++] = v1[j]; 101429b521d4Sstefano_zampini } 101529b521d4Sstefano_zampini } 101629b521d4Sstefano_zampini } 101729b521d4Sstefano_zampini } else { 101816ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 101916ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 102067877ebaSShri Abhyankar ajj = aj + adiag[i]; 1021cf3759fdSShri Abhyankar v1 = av + adiag[i]; 102267877ebaSShri Abhyankar for (j = 0; j < rnz; j++) { 10239566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 10249566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 1025a6053eceSJunchao Zhang val[nz++] = v1[j]; 102616ebf90aSShri Abhyankar } 102716ebf90aSShri Abhyankar } 102829b521d4Sstefano_zampini } 1029397b6df1SKris Buschelman } else { 1030a6053eceSJunchao Zhang nz = 0; 1031a6053eceSJunchao Zhang val = mumps->val; 103229b521d4Sstefano_zampini if (missing) { 103316ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 103429b521d4Sstefano_zampini if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 103529b521d4Sstefano_zampini for (j = ai[i]; j < ai[i + 1]; j++) { 103629b521d4Sstefano_zampini if (aj[j] < i) continue; 103729b521d4Sstefano_zampini val[nz++] = av[j]; 103829b521d4Sstefano_zampini } 103929b521d4Sstefano_zampini } else { 104016ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 104167877ebaSShri Abhyankar v1 = av + adiag[i]; 1042ad540459SPierre Jolivet for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 104316ebf90aSShri Abhyankar } 104416ebf90aSShri Abhyankar } 104529b521d4Sstefano_zampini } else { 104616ebf90aSShri Abhyankar for (i = 0; i < M; i++) { 104716ebf90aSShri Abhyankar rnz = ai[i + 1] - adiag[i]; 104816ebf90aSShri Abhyankar v1 = av + adiag[i]; 1049ad540459SPierre Jolivet for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 105016ebf90aSShri Abhyankar } 105116ebf90aSShri Abhyankar } 105229b521d4Sstefano_zampini } 10539566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 10543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 105516ebf90aSShri Abhyankar } 105616ebf90aSShri Abhyankar 105766976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1058d71ae5a4SJacob Faibussowitsch { 1059a6053eceSJunchao Zhang const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 1060a6053eceSJunchao Zhang PetscInt bs; 10616497c311SBarry Smith PetscCount rstart, nz, i, j, k, m, jj, irow, countA, countB; 1062a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 106316ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 106416ebf90aSShri Abhyankar PetscScalar *val; 1065397b6df1SKris Buschelman Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 1066f4f49eeaSPierre Jolivet Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)mat->A->data; 1067f4f49eeaSPierre Jolivet Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 1068ec4f40fdSPierre Jolivet const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 106938548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 1070b94d7dedSBarry Smith PetscBool hermitian, isset; 107138548759SBarry Smith #endif 107216ebf90aSShri Abhyankar 107316ebf90aSShri Abhyankar PetscFunctionBegin; 107438548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 1075b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 1076b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 107738548759SBarry Smith #endif 10789566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 107938548759SBarry Smith rstart = A->rmap->rstart; 108038548759SBarry Smith ai = aa->i; 108138548759SBarry Smith aj = aa->j; 108238548759SBarry Smith bi = bb->i; 108338548759SBarry Smith bj = bb->j; 108438548759SBarry Smith av = aa->a; 108538548759SBarry Smith bv = bb->a; 1086397b6df1SKris Buschelman 10872205254eSKarl Rupp garray = mat->garray; 10882205254eSKarl Rupp 1089bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 1090a6053eceSJunchao Zhang nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 10919566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 10929566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 1093a6053eceSJunchao Zhang /* can not decide the exact mumps->nnz now because of the SBAIJ */ 1094a6053eceSJunchao Zhang mumps->irn = row; 1095a6053eceSJunchao Zhang mumps->jcn = col; 1096a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 1097397b6df1SKris Buschelman } else { 1098a6053eceSJunchao Zhang val = mumps->val; 1099397b6df1SKris Buschelman } 1100397b6df1SKris Buschelman 11019371c9d4SSatish Balay jj = 0; 11029371c9d4SSatish Balay irow = rstart; 1103ec4f40fdSPierre Jolivet for (i = 0; i < mbs; i++) { 1104397b6df1SKris Buschelman ajj = aj + ai[i]; /* ptr to the beginning of this row */ 1105397b6df1SKris Buschelman countA = ai[i + 1] - ai[i]; 1106397b6df1SKris Buschelman countB = bi[i + 1] - bi[i]; 1107397b6df1SKris Buschelman bjj = bj + bi[i]; 1108ec4f40fdSPierre Jolivet v1 = av + ai[i] * bs2; 1109ec4f40fdSPierre Jolivet v2 = bv + bi[i] * bs2; 1110397b6df1SKris Buschelman 1111ec4f40fdSPierre Jolivet if (bs > 1) { 1112ec4f40fdSPierre Jolivet /* A-part */ 1113ec4f40fdSPierre Jolivet for (j = 0; j < countA; j++) { 1114ec4f40fdSPierre Jolivet for (k = 0; k < bs; k++) { 1115ec4f40fdSPierre Jolivet for (m = 0; m < bs; m++) { 1116ec4f40fdSPierre Jolivet if (rstart + ajj[j] * bs > irow || k >= m) { 1117ec4f40fdSPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 11189566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 11199566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 1120ec4f40fdSPierre Jolivet } 1121ec4f40fdSPierre Jolivet val[jj++] = v1[j * bs2 + m + k * bs]; 1122ec4f40fdSPierre Jolivet } 1123ec4f40fdSPierre Jolivet } 1124ec4f40fdSPierre Jolivet } 1125ec4f40fdSPierre Jolivet } 1126ec4f40fdSPierre Jolivet 1127ec4f40fdSPierre Jolivet /* B-part */ 1128ec4f40fdSPierre Jolivet for (j = 0; j < countB; j++) { 1129ec4f40fdSPierre Jolivet for (k = 0; k < bs; k++) { 1130ec4f40fdSPierre Jolivet for (m = 0; m < bs; m++) { 1131ec4f40fdSPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 11329566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 11339566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 1134ec4f40fdSPierre Jolivet } 1135ec4f40fdSPierre Jolivet val[jj++] = v2[j * bs2 + m + k * bs]; 1136ec4f40fdSPierre Jolivet } 1137ec4f40fdSPierre Jolivet } 1138ec4f40fdSPierre Jolivet } 1139ec4f40fdSPierre Jolivet } else { 1140397b6df1SKris Buschelman /* A-part */ 1141397b6df1SKris Buschelman for (j = 0; j < countA; j++) { 1142bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 11439566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 11449566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 1145397b6df1SKris Buschelman } 114616ebf90aSShri Abhyankar val[jj++] = v1[j]; 1147397b6df1SKris Buschelman } 114816ebf90aSShri Abhyankar 114916ebf90aSShri Abhyankar /* B-part */ 115016ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 1151bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 11529566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 11539566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1154397b6df1SKris Buschelman } 115516ebf90aSShri Abhyankar val[jj++] = v2[j]; 115616ebf90aSShri Abhyankar } 115716ebf90aSShri Abhyankar } 1158ec4f40fdSPierre Jolivet irow += bs; 1159ec4f40fdSPierre Jolivet } 11605d955bbbSStefano Zampini if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 11613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 116216ebf90aSShri Abhyankar } 116316ebf90aSShri Abhyankar 116466976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1165d71ae5a4SJacob Faibussowitsch { 116616ebf90aSShri Abhyankar const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 11676497c311SBarry Smith PetscCount rstart, cstart, nz, i, j, jj, irow, countA, countB; 1168a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 116916ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 117016ebf90aSShri Abhyankar PetscScalar *val; 1171a3d589ffSStefano Zampini Mat Ad, Ao; 1172a3d589ffSStefano Zampini Mat_SeqAIJ *aa; 1173a3d589ffSStefano Zampini Mat_SeqAIJ *bb; 117416ebf90aSShri Abhyankar 117516ebf90aSShri Abhyankar PetscFunctionBegin; 11769566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 11779566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 11789566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 1179a3d589ffSStefano Zampini 118057508eceSPierre Jolivet aa = (Mat_SeqAIJ *)Ad->data; 118157508eceSPierre Jolivet bb = (Mat_SeqAIJ *)Ao->data; 118238548759SBarry Smith ai = aa->i; 118338548759SBarry Smith aj = aa->j; 118438548759SBarry Smith bi = bb->i; 118538548759SBarry Smith bj = bb->j; 118616ebf90aSShri Abhyankar 1187a3d589ffSStefano Zampini rstart = A->rmap->rstart; 11885d955bbbSStefano Zampini cstart = A->cmap->rstart; 11892205254eSKarl Rupp 1190bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 11916497c311SBarry Smith nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 11929566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 11939566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 1194a6053eceSJunchao Zhang mumps->nnz = nz; 1195a6053eceSJunchao Zhang mumps->irn = row; 1196a6053eceSJunchao Zhang mumps->jcn = col; 1197a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 119816ebf90aSShri Abhyankar } else { 1199a6053eceSJunchao Zhang val = mumps->val; 120016ebf90aSShri Abhyankar } 120116ebf90aSShri Abhyankar 12029371c9d4SSatish Balay jj = 0; 12039371c9d4SSatish Balay irow = rstart; 120416ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 120516ebf90aSShri Abhyankar ajj = aj + ai[i]; /* ptr to the beginning of this row */ 120616ebf90aSShri Abhyankar countA = ai[i + 1] - ai[i]; 120716ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 120816ebf90aSShri Abhyankar bjj = bj + bi[i]; 120916ebf90aSShri Abhyankar v1 = av + ai[i]; 121016ebf90aSShri Abhyankar v2 = bv + bi[i]; 121116ebf90aSShri Abhyankar 121216ebf90aSShri Abhyankar /* A-part */ 121316ebf90aSShri Abhyankar for (j = 0; j < countA; j++) { 1214bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 12159566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 12165d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 121716ebf90aSShri Abhyankar } 121816ebf90aSShri Abhyankar val[jj++] = v1[j]; 121916ebf90aSShri Abhyankar } 122016ebf90aSShri Abhyankar 122116ebf90aSShri Abhyankar /* B-part */ 122216ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 1223bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 12249566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 12259566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 122616ebf90aSShri Abhyankar } 122716ebf90aSShri Abhyankar val[jj++] = v2[j]; 122816ebf90aSShri Abhyankar } 122916ebf90aSShri Abhyankar irow++; 123016ebf90aSShri Abhyankar } 12319566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 12329566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 12333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 123416ebf90aSShri Abhyankar } 123516ebf90aSShri Abhyankar 123666976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1237d71ae5a4SJacob Faibussowitsch { 123867877ebaSShri Abhyankar Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 1239f4f49eeaSPierre Jolivet Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)mat->A->data; 1240f4f49eeaSPierre Jolivet Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 124167877ebaSShri Abhyankar const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 12425d955bbbSStefano Zampini const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 124333d57670SJed Brown const PetscInt bs2 = mat->bs2; 1244a6053eceSJunchao Zhang PetscInt bs; 12456497c311SBarry Smith PetscCount nz, i, j, k, n, jj, irow, countA, countB, idx; 1246a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 124767877ebaSShri Abhyankar const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 124867877ebaSShri Abhyankar PetscScalar *val; 124967877ebaSShri Abhyankar 125067877ebaSShri Abhyankar PetscFunctionBegin; 12519566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 1252bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 125367877ebaSShri Abhyankar nz = bs2 * (aa->nz + bb->nz); 12549566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 12559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 1256a6053eceSJunchao Zhang mumps->nnz = nz; 1257a6053eceSJunchao Zhang mumps->irn = row; 1258a6053eceSJunchao Zhang mumps->jcn = col; 1259a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 126067877ebaSShri Abhyankar } else { 1261a6053eceSJunchao Zhang val = mumps->val; 126267877ebaSShri Abhyankar } 126367877ebaSShri Abhyankar 12649371c9d4SSatish Balay jj = 0; 12659371c9d4SSatish Balay irow = rstart; 126667877ebaSShri Abhyankar for (i = 0; i < mbs; i++) { 126767877ebaSShri Abhyankar countA = ai[i + 1] - ai[i]; 126867877ebaSShri Abhyankar countB = bi[i + 1] - bi[i]; 126967877ebaSShri Abhyankar ajj = aj + ai[i]; 127067877ebaSShri Abhyankar bjj = bj + bi[i]; 127167877ebaSShri Abhyankar v1 = av + bs2 * ai[i]; 127267877ebaSShri Abhyankar v2 = bv + bs2 * bi[i]; 127367877ebaSShri Abhyankar 127467877ebaSShri Abhyankar idx = 0; 127567877ebaSShri Abhyankar /* A-part */ 127667877ebaSShri Abhyankar for (k = 0; k < countA; k++) { 127767877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 127867877ebaSShri Abhyankar for (n = 0; n < bs; n++) { 1279bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 12809566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 12815d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 128267877ebaSShri Abhyankar } 128367877ebaSShri Abhyankar val[jj++] = v1[idx++]; 128467877ebaSShri Abhyankar } 128567877ebaSShri Abhyankar } 128667877ebaSShri Abhyankar } 128767877ebaSShri Abhyankar 128867877ebaSShri Abhyankar idx = 0; 128967877ebaSShri Abhyankar /* B-part */ 129067877ebaSShri Abhyankar for (k = 0; k < countB; k++) { 129167877ebaSShri Abhyankar for (j = 0; j < bs; j++) { 129267877ebaSShri Abhyankar for (n = 0; n < bs; n++) { 1293bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 12949566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 12959566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 129667877ebaSShri Abhyankar } 1297d985c460SShri Abhyankar val[jj++] = v2[idx++]; 129867877ebaSShri Abhyankar } 129967877ebaSShri Abhyankar } 130067877ebaSShri Abhyankar } 1301d985c460SShri Abhyankar irow += bs; 130267877ebaSShri Abhyankar } 13033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 130467877ebaSShri Abhyankar } 130567877ebaSShri Abhyankar 130666976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1307d71ae5a4SJacob Faibussowitsch { 130816ebf90aSShri Abhyankar const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 13096497c311SBarry Smith PetscCount rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 1310a6053eceSJunchao Zhang PetscMUMPSInt *row, *col; 131116ebf90aSShri Abhyankar const PetscScalar *av, *bv, *v1, *v2; 131216ebf90aSShri Abhyankar PetscScalar *val; 1313a3d589ffSStefano Zampini Mat Ad, Ao; 1314a3d589ffSStefano Zampini Mat_SeqAIJ *aa; 1315a3d589ffSStefano Zampini Mat_SeqAIJ *bb; 131638548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 1317b94d7dedSBarry Smith PetscBool hermitian, isset; 131838548759SBarry Smith #endif 131916ebf90aSShri Abhyankar 132016ebf90aSShri Abhyankar PetscFunctionBegin; 132138548759SBarry Smith #if defined(PETSC_USE_COMPLEX) 1322b94d7dedSBarry Smith PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 1323b94d7dedSBarry Smith PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 132438548759SBarry Smith #endif 13259566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 13269566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 13279566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 1328a3d589ffSStefano Zampini 132957508eceSPierre Jolivet aa = (Mat_SeqAIJ *)Ad->data; 133057508eceSPierre Jolivet bb = (Mat_SeqAIJ *)Ao->data; 133138548759SBarry Smith ai = aa->i; 133238548759SBarry Smith aj = aa->j; 133338548759SBarry Smith adiag = aa->diag; 133438548759SBarry Smith bi = bb->i; 133538548759SBarry Smith bj = bb->j; 13362205254eSKarl Rupp 133716ebf90aSShri Abhyankar rstart = A->rmap->rstart; 133816ebf90aSShri Abhyankar 1339bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 1340e0bace9bSHong Zhang nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 1341e0bace9bSHong Zhang nzb = 0; /* num of upper triangular entries in mat->B */ 134216ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 1343e0bace9bSHong Zhang nza += (ai[i + 1] - adiag[i]); 134416ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 134516ebf90aSShri Abhyankar bjj = bj + bi[i]; 1346e0bace9bSHong Zhang for (j = 0; j < countB; j++) { 1347e0bace9bSHong Zhang if (garray[bjj[j]] > rstart) nzb++; 1348e0bace9bSHong Zhang } 1349e0bace9bSHong Zhang } 135016ebf90aSShri Abhyankar 1351e0bace9bSHong Zhang nz = nza + nzb; /* total nz of upper triangular part of mat */ 13529566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &row, nz, &col)); 13539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &val)); 1354a6053eceSJunchao Zhang mumps->nnz = nz; 1355a6053eceSJunchao Zhang mumps->irn = row; 1356a6053eceSJunchao Zhang mumps->jcn = col; 1357a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 135816ebf90aSShri Abhyankar } else { 1359a6053eceSJunchao Zhang val = mumps->val; 136016ebf90aSShri Abhyankar } 136116ebf90aSShri Abhyankar 13629371c9d4SSatish Balay jj = 0; 13639371c9d4SSatish Balay irow = rstart; 136416ebf90aSShri Abhyankar for (i = 0; i < m; i++) { 136516ebf90aSShri Abhyankar ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 136616ebf90aSShri Abhyankar v1 = av + adiag[i]; 136716ebf90aSShri Abhyankar countA = ai[i + 1] - adiag[i]; 136816ebf90aSShri Abhyankar countB = bi[i + 1] - bi[i]; 136916ebf90aSShri Abhyankar bjj = bj + bi[i]; 137016ebf90aSShri Abhyankar v2 = bv + bi[i]; 137116ebf90aSShri Abhyankar 137216ebf90aSShri Abhyankar /* A-part */ 137316ebf90aSShri Abhyankar for (j = 0; j < countA; j++) { 1374bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 13759566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 13769566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 137716ebf90aSShri Abhyankar } 137816ebf90aSShri Abhyankar val[jj++] = v1[j]; 137916ebf90aSShri Abhyankar } 138016ebf90aSShri Abhyankar 138116ebf90aSShri Abhyankar /* B-part */ 138216ebf90aSShri Abhyankar for (j = 0; j < countB; j++) { 138316ebf90aSShri Abhyankar if (garray[bjj[j]] > rstart) { 1384bccb9932SShri Abhyankar if (reuse == MAT_INITIAL_MATRIX) { 13859566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 13869566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 138716ebf90aSShri Abhyankar } 138816ebf90aSShri Abhyankar val[jj++] = v2[j]; 138916ebf90aSShri Abhyankar } 1390397b6df1SKris Buschelman } 1391397b6df1SKris Buschelman irow++; 1392397b6df1SKris Buschelman } 13939566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 13949566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 13953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1396397b6df1SKris Buschelman } 1397397b6df1SKris Buschelman 1398d2a308c1SPierre Jolivet static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1399c3e1b152SPierre Jolivet { 1400c3e1b152SPierre Jolivet const PetscScalar *av; 1401c3e1b152SPierre Jolivet const PetscInt M = A->rmap->n; 14026497c311SBarry Smith PetscCount i; 1403c3e1b152SPierre Jolivet PetscMUMPSInt *row, *col; 1404c3e1b152SPierre Jolivet Vec v; 1405c3e1b152SPierre Jolivet 1406c3e1b152SPierre Jolivet PetscFunctionBegin; 1407c3e1b152SPierre Jolivet PetscCall(MatDiagonalGetDiagonal(A, &v)); 1408c3e1b152SPierre Jolivet PetscCall(VecGetArrayRead(v, &av)); 1409c3e1b152SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 1410c3e1b152SPierre Jolivet PetscCall(PetscMalloc2(M, &row, M, &col)); 1411c3e1b152SPierre Jolivet for (i = 0; i < M; i++) { 1412c3e1b152SPierre Jolivet PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1413c3e1b152SPierre Jolivet col[i] = row[i]; 1414c3e1b152SPierre Jolivet } 1415c3e1b152SPierre Jolivet mumps->val = (PetscScalar *)av; 1416c3e1b152SPierre Jolivet mumps->irn = row; 1417c3e1b152SPierre Jolivet mumps->jcn = col; 1418c3e1b152SPierre Jolivet mumps->nnz = M; 1419127cd276SPierre Jolivet } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1420127cd276SPierre Jolivet else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1421c3e1b152SPierre Jolivet PetscCall(VecRestoreArrayRead(v, &av)); 1422c3e1b152SPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS); 1423c3e1b152SPierre Jolivet } 1424c3e1b152SPierre Jolivet 1425d2a308c1SPierre Jolivet static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 14264b9405b2SPierre Jolivet { 14274b9405b2SPierre Jolivet PetscScalar *v; 14284b9405b2SPierre Jolivet const PetscInt m = A->rmap->n, N = A->cmap->N; 14294b9405b2SPierre Jolivet PetscInt lda; 14306497c311SBarry Smith PetscCount i, j; 14314b9405b2SPierre Jolivet PetscMUMPSInt *row, *col; 14324b9405b2SPierre Jolivet 14334b9405b2SPierre Jolivet PetscFunctionBegin; 14344b9405b2SPierre Jolivet PetscCall(MatDenseGetArray(A, &v)); 14354b9405b2SPierre Jolivet PetscCall(MatDenseGetLDA(A, &lda)); 14364b9405b2SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 14374b9405b2SPierre Jolivet PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 14384b9405b2SPierre Jolivet for (i = 0; i < m; i++) { 14394b9405b2SPierre Jolivet col[i] = 0; 14404b9405b2SPierre Jolivet PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 14414b9405b2SPierre Jolivet } 14424b9405b2SPierre Jolivet for (j = 1; j < N; j++) { 14434b9405b2SPierre Jolivet for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 14444b9405b2SPierre Jolivet PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 14454b9405b2SPierre Jolivet } 14464b9405b2SPierre Jolivet if (lda == m) mumps->val = v; 14474b9405b2SPierre Jolivet else { 14484b9405b2SPierre Jolivet PetscCall(PetscMalloc1(m * N, &mumps->val)); 14494b9405b2SPierre Jolivet mumps->val_alloc = mumps->val; 14504b9405b2SPierre Jolivet for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 14514b9405b2SPierre Jolivet } 14524b9405b2SPierre Jolivet mumps->irn = row; 14534b9405b2SPierre Jolivet mumps->jcn = col; 14544b9405b2SPierre Jolivet mumps->nnz = m * N; 14554b9405b2SPierre Jolivet } else { 14564b9405b2SPierre Jolivet if (lda == m && !mumps->nest_vals) mumps->val = v; 14574b9405b2SPierre Jolivet else { 14584b9405b2SPierre Jolivet for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 14594b9405b2SPierre Jolivet } 14604b9405b2SPierre Jolivet } 14614b9405b2SPierre Jolivet PetscCall(MatDenseRestoreArray(A, &v)); 14624b9405b2SPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS); 14634b9405b2SPierre Jolivet } 14644b9405b2SPierre Jolivet 146553587d93SPierre Jolivet // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a 146653587d93SPierre Jolivet // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated 146753587d93SPierre Jolivet // and its rows and columns permuted 146853587d93SPierre Jolivet // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest() 146953587d93SPierre Jolivet static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap) 147053587d93SPierre Jolivet { 147153587d93SPierre Jolivet Mat A; 147253587d93SPierre Jolivet PetscScalar s[2]; 147353587d93SPierre Jolivet PetscBool isTrans, isHTrans, compare; 147453587d93SPierre Jolivet 147553587d93SPierre Jolivet PetscFunctionBegin; 147653587d93SPierre Jolivet do { 147753587d93SPierre Jolivet PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans)); 147853587d93SPierre Jolivet if (isTrans) { 147953587d93SPierre Jolivet PetscCall(MatTransposeGetMat(*sub, &A)); 148053587d93SPierre Jolivet isHTrans = PETSC_FALSE; 148153587d93SPierre Jolivet } else { 148253587d93SPierre Jolivet PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 148353587d93SPierre Jolivet if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A)); 148453587d93SPierre Jolivet } 148553587d93SPierre Jolivet compare = (PetscBool)(isTrans || isHTrans); 148653587d93SPierre Jolivet if (compare) { 148753587d93SPierre Jolivet if (vshift && vscale) { 148853587d93SPierre Jolivet PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 148953587d93SPierre Jolivet if (!*conjugate) { 149053587d93SPierre Jolivet *vshift += s[0] * *vscale; 149153587d93SPierre Jolivet *vscale *= s[1]; 149253587d93SPierre Jolivet } else { 149353587d93SPierre Jolivet *vshift += PetscConj(s[0]) * *vscale; 149453587d93SPierre Jolivet *vscale *= PetscConj(s[1]); 149553587d93SPierre Jolivet } 149653587d93SPierre Jolivet } 149753587d93SPierre Jolivet if (swap) *swap = (PetscBool)!*swap; 149853587d93SPierre Jolivet if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate; 149953587d93SPierre Jolivet *sub = A; 150053587d93SPierre Jolivet } 150153587d93SPierre Jolivet } while (compare); 150253587d93SPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS); 150353587d93SPierre Jolivet } 150453587d93SPierre Jolivet 150566976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 15069d0448ceSStefano Zampini { 15079d0448ceSStefano Zampini Mat **mats; 15089d0448ceSStefano Zampini PetscInt nr, nc; 15099d0448ceSStefano Zampini PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 15109d0448ceSStefano Zampini 15119d0448ceSStefano Zampini PetscFunctionBegin; 15129d0448ceSStefano Zampini PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 15139d0448ceSStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 15149d0448ceSStefano Zampini PetscMUMPSInt *irns, *jcns; 15159d0448ceSStefano Zampini PetscScalar *vals; 15166497c311SBarry Smith PetscCount totnnz, cumnnz, maxnnz; 151793d70b8aSPierre Jolivet PetscInt *pjcns_w, Mbs = 0; 15189d0448ceSStefano Zampini IS *rows, *cols; 15199d0448ceSStefano Zampini PetscInt **rows_idx, **cols_idx; 15209d0448ceSStefano Zampini 15219d0448ceSStefano Zampini cumnnz = 0; 15229d0448ceSStefano Zampini maxnnz = 0; 15235d955bbbSStefano Zampini PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 15249d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 15259d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 15269d0448ceSStefano Zampini Mat sub = mats[r][c]; 15279d0448ceSStefano Zampini 15289d0448ceSStefano Zampini mumps->nest_convert_to_triples[r * nc + c] = NULL; 15299d0448ceSStefano Zampini if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 15309d0448ceSStefano Zampini if (sub) { 15319d0448ceSStefano Zampini PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 153253587d93SPierre Jolivet PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 15339d0448ceSStefano Zampini MatInfo info; 15349d0448ceSStefano Zampini 153553587d93SPierre Jolivet PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 15369d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 15379d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 15389d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 15399d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 15409d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 15419d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1542c3e1b152SPierre Jolivet PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 15434b9405b2SPierre Jolivet PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 15449d0448ceSStefano Zampini 15459d0448ceSStefano Zampini if (chol) { 15469d0448ceSStefano Zampini if (r == c) { 15479d0448ceSStefano Zampini if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 15489d0448ceSStefano Zampini else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 15499d0448ceSStefano Zampini else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 15509d0448ceSStefano Zampini else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1551c3e1b152SPierre Jolivet else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 15524b9405b2SPierre Jolivet else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 15539d0448ceSStefano Zampini } else { 15549d0448ceSStefano Zampini if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 15559d0448ceSStefano Zampini else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 15569d0448ceSStefano Zampini else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 15579d0448ceSStefano Zampini else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1558c3e1b152SPierre Jolivet else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 15594b9405b2SPierre Jolivet else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 15609d0448ceSStefano Zampini } 15619d0448ceSStefano Zampini } else { 15629d0448ceSStefano Zampini if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 15639d0448ceSStefano Zampini else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 15649d0448ceSStefano Zampini else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 15659d0448ceSStefano Zampini else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1566c3e1b152SPierre Jolivet else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 15674b9405b2SPierre Jolivet else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 15689d0448ceSStefano Zampini } 15699d0448ceSStefano Zampini PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 15709d0448ceSStefano Zampini mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 15719d0448ceSStefano Zampini PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 15726497c311SBarry Smith cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */ 15739d0448ceSStefano Zampini maxnnz = PetscMax(maxnnz, info.nz_used); 15749d0448ceSStefano Zampini } 15759d0448ceSStefano Zampini } 15769d0448ceSStefano Zampini } 15779d0448ceSStefano Zampini 15789d0448ceSStefano Zampini /* Allocate total COO */ 15799d0448ceSStefano Zampini totnnz = cumnnz; 15809d0448ceSStefano Zampini PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 15819d0448ceSStefano Zampini PetscCall(PetscMalloc1(totnnz, &vals)); 15829d0448ceSStefano Zampini 15839d0448ceSStefano Zampini /* Handle rows and column maps 15849d0448ceSStefano Zampini We directly map rows and use an SF for the columns */ 15859d0448ceSStefano Zampini PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 15869d0448ceSStefano Zampini PetscCall(MatNestGetISs(A, rows, cols)); 15879d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 15889d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 15899d0448ceSStefano Zampini if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 15905d955bbbSStefano Zampini else (void)maxnnz; 15919d0448ceSStefano Zampini 15929d0448ceSStefano Zampini cumnnz = 0; 15939d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 15949d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 15959d0448ceSStefano Zampini Mat sub = mats[r][c]; 15969d0448ceSStefano Zampini const PetscInt *ridx = rows_idx[r]; 15975d955bbbSStefano Zampini const PetscInt *cidx = cols_idx[c]; 159853587d93SPierre Jolivet PetscScalar vscale = 1.0, vshift = 0.0; 159993d70b8aSPierre Jolivet PetscInt rst, size, bs; 16009d0448ceSStefano Zampini PetscSF csf; 160153587d93SPierre Jolivet PetscBool conjugate = PETSC_FALSE, swap = PETSC_FALSE; 16025d955bbbSStefano Zampini PetscLayout cmap; 16036497c311SBarry Smith PetscInt innz; 16049d0448ceSStefano Zampini 16059d0448ceSStefano Zampini mumps->nest_vals_start[r * nc + c] = cumnnz; 160693d70b8aSPierre Jolivet if (c == r) { 160793d70b8aSPierre Jolivet PetscCall(ISGetSize(rows[r], &size)); 160893d70b8aSPierre Jolivet if (!mumps->nest_convert_to_triples[r * nc + c]) { 160993d70b8aSPierre Jolivet for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 161093d70b8aSPierre Jolivet } 161193d70b8aSPierre Jolivet PetscCall(MatGetBlockSize(sub, &bs)); 161293d70b8aSPierre Jolivet Mbs += size / bs; 161393d70b8aSPierre Jolivet } 16149d0448ceSStefano Zampini if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 16159d0448ceSStefano Zampini 16165d955bbbSStefano Zampini /* Extract inner blocks if needed */ 161753587d93SPierre Jolivet PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap)); 161853587d93SPierre Jolivet PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 16195d955bbbSStefano Zampini 16205d955bbbSStefano Zampini /* Get column layout to map off-process columns */ 16215d955bbbSStefano Zampini PetscCall(MatGetLayouts(sub, NULL, &cmap)); 16225d955bbbSStefano Zampini 16235d955bbbSStefano Zampini /* Get row start to map on-process rows */ 16245d955bbbSStefano Zampini PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 16255d955bbbSStefano Zampini 16269d0448ceSStefano Zampini /* Directly use the mumps datastructure and use C ordering for now */ 16279d0448ceSStefano Zampini PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 16289d0448ceSStefano Zampini 16295d955bbbSStefano Zampini /* Swap the role of rows and columns indices for transposed blocks 16305d955bbbSStefano Zampini since we need values with global final ordering */ 16315d955bbbSStefano Zampini if (swap) { 16325d955bbbSStefano Zampini cidx = rows_idx[r]; 16335d955bbbSStefano Zampini ridx = cols_idx[c]; 16349d0448ceSStefano Zampini } 16359d0448ceSStefano Zampini 16365d955bbbSStefano Zampini /* Communicate column indices 16375d955bbbSStefano Zampini This could have been done with a single SF but it would have complicated the code a lot. 16385d955bbbSStefano Zampini But since we do it only once, we pay the price of setting up an SF for each block */ 16395d955bbbSStefano Zampini if (PetscDefined(USE_64BIT_INDICES)) { 16405d955bbbSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1641f4f49eeaSPierre Jolivet } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */ 16429d0448ceSStefano Zampini PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 16436497c311SBarry Smith PetscCall(PetscIntCast(mumps->nnz, &innz)); 16446497c311SBarry Smith PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w)); 16455d955bbbSStefano Zampini PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 16465d955bbbSStefano Zampini PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 16479d0448ceSStefano Zampini PetscCall(PetscSFDestroy(&csf)); 16489d0448ceSStefano Zampini 16495d955bbbSStefano Zampini /* Import indices: use direct map for rows and mapped indices for columns */ 16505d955bbbSStefano Zampini if (swap) { 16515d955bbbSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) { 16525d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 16535d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 16545d955bbbSStefano Zampini } 16555d955bbbSStefano Zampini } else { 16565d955bbbSStefano Zampini for (PetscInt k = 0; k < mumps->nnz; k++) { 16575d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 16585d955bbbSStefano Zampini PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 16595d955bbbSStefano Zampini } 16605d955bbbSStefano Zampini } 16615d955bbbSStefano Zampini 16625d955bbbSStefano Zampini /* Import values to full COO */ 166353587d93SPierre Jolivet if (conjugate) { /* conjugate the entries */ 166450c845baSStefano Zampini PetscScalar *v = vals + cumnnz; 166553587d93SPierre Jolivet for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]); 166653587d93SPierre Jolivet } else if (vscale != 1.0) { 166753587d93SPierre Jolivet PetscScalar *v = vals + cumnnz; 166853587d93SPierre Jolivet for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k]; 166953587d93SPierre Jolivet } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 16709d0448ceSStefano Zampini 16719d0448ceSStefano Zampini /* Shift new starting point and sanity check */ 16729d0448ceSStefano Zampini cumnnz += mumps->nnz; 16736497c311SBarry Smith PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 16749d0448ceSStefano Zampini 16759d0448ceSStefano Zampini /* Free scratch memory */ 16769d0448ceSStefano Zampini PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 16779d0448ceSStefano Zampini PetscCall(PetscFree(mumps->val_alloc)); 16789d0448ceSStefano Zampini mumps->val = NULL; 16799d0448ceSStefano Zampini mumps->nnz = 0; 16809d0448ceSStefano Zampini } 16819d0448ceSStefano Zampini } 168293d70b8aSPierre Jolivet if (mumps->id.ICNTL(15) == 1) { 168393d70b8aSPierre Jolivet if (Mbs != A->rmap->N) { 168493d70b8aSPierre Jolivet PetscMPIInt rank, size; 168593d70b8aSPierre Jolivet 168693d70b8aSPierre Jolivet PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 168793d70b8aSPierre Jolivet PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 168893d70b8aSPierre Jolivet if (rank == 0) { 168993d70b8aSPierre Jolivet PetscInt shift = 0; 169093d70b8aSPierre Jolivet 169193d70b8aSPierre Jolivet PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk)); 169293d70b8aSPierre Jolivet PetscCall(PetscFree(mumps->id.blkptr)); 169393d70b8aSPierre Jolivet PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr)); 169493d70b8aSPierre Jolivet mumps->id.blkptr[0] = 1; 169593d70b8aSPierre Jolivet for (PetscInt i = 0; i < size; ++i) { 169693d70b8aSPierre Jolivet for (PetscInt r = 0; r < nr; r++) { 169793d70b8aSPierre Jolivet Mat sub = mats[r][r]; 169893d70b8aSPierre Jolivet const PetscInt *ranges; 169993d70b8aSPierre Jolivet PetscInt bs; 170093d70b8aSPierre Jolivet 170193d70b8aSPierre Jolivet for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 170293d70b8aSPierre Jolivet PetscCall(MatGetOwnershipRanges(sub, &ranges)); 170393d70b8aSPierre Jolivet PetscCall(MatGetBlockSize(sub, &bs)); 170493d70b8aSPierre Jolivet for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1)); 170593d70b8aSPierre Jolivet shift += (ranges[i + 1] - ranges[i]) / bs; 170693d70b8aSPierre Jolivet } 170793d70b8aSPierre Jolivet } 170893d70b8aSPierre Jolivet } 170993d70b8aSPierre Jolivet } else mumps->id.ICNTL(15) = 0; 171093d70b8aSPierre Jolivet } 17119d0448ceSStefano Zampini if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 17129d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 17139d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 17149d0448ceSStefano Zampini PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 17156497c311SBarry Smith if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 17165d955bbbSStefano Zampini mumps->nest_vals_start[nr * nc] = cumnnz; 17179d0448ceSStefano Zampini 17189d0448ceSStefano Zampini /* Set pointers for final MUMPS data structure */ 17199d0448ceSStefano Zampini mumps->nest_vals = vals; 17209d0448ceSStefano Zampini mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 17219d0448ceSStefano Zampini mumps->val = vals; 17229d0448ceSStefano Zampini mumps->irn = irns; 17239d0448ceSStefano Zampini mumps->jcn = jcns; 17249d0448ceSStefano Zampini mumps->nnz = cumnnz; 17259d0448ceSStefano Zampini } else { 17269d0448ceSStefano Zampini PetscScalar *oval = mumps->nest_vals; 17279d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 17289d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 172953587d93SPierre Jolivet PetscBool conjugate = PETSC_FALSE; 17305d955bbbSStefano Zampini Mat sub = mats[r][c]; 173153587d93SPierre Jolivet PetscScalar vscale = 1.0, vshift = 0.0; 17325d955bbbSStefano Zampini PetscInt midx = r * nc + c; 17335d955bbbSStefano Zampini 17345d955bbbSStefano Zampini if (!mumps->nest_convert_to_triples[midx]) continue; 173553587d93SPierre Jolivet PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL)); 173653587d93SPierre Jolivet PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 17375d955bbbSStefano Zampini mumps->val = oval + mumps->nest_vals_start[midx]; 17385d955bbbSStefano Zampini PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 173953587d93SPierre Jolivet if (conjugate) { 17406497c311SBarry Smith PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 174153587d93SPierre Jolivet for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]); 174253587d93SPierre Jolivet } else if (vscale != 1.0) { 174353587d93SPierre Jolivet PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 174453587d93SPierre Jolivet for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale; 17455d955bbbSStefano Zampini } 17469d0448ceSStefano Zampini } 17479d0448ceSStefano Zampini } 17489d0448ceSStefano Zampini mumps->val = oval; 17499d0448ceSStefano Zampini } 17509d0448ceSStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 17519d0448ceSStefano Zampini } 17529d0448ceSStefano Zampini 175366976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MUMPS(Mat A) 1754d71ae5a4SJacob Faibussowitsch { 1755a6053eceSJunchao Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1756b24902e0SBarry Smith 1757397b6df1SKris Buschelman PetscFunctionBegin; 1758cf053153SJunchao Zhang PetscCall(PetscFree(mumps->id.isol_loc)); 17599566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 17609566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 17619566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->b_seq)); 17629566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->x_seq)); 17639566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.perm_in)); 176493d70b8aSPierre Jolivet PetscCall(PetscFree(mumps->id.blkvar)); 176593d70b8aSPierre Jolivet PetscCall(PetscFree(mumps->id.blkptr)); 17669566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 17679566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->val_alloc)); 17689566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->info)); 1769413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->ICNTL_pre)); 1770413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->CNTL_pre)); 17719566063dSJacob Faibussowitsch PetscCall(MatMumpsResetSchur_Private(mumps)); 1772413bcc21SPierre Jolivet if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1773a5e57a09SHong Zhang mumps->id.job = JOB_END; 17743ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 17759261f6e4SBarry Smith PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1776413bcc21SPierre Jolivet if (mumps->mumps_comm != MPI_COMM_NULL) { 1777413bcc21SPierre Jolivet if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1778413bcc21SPierre Jolivet else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1779413bcc21SPierre Jolivet } 1780413bcc21SPierre Jolivet } 1781cf053153SJunchao Zhang PetscCall(MatMumpsFreeInternalID(&mumps->id)); 17823ab56b82SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 178367602552SJunchao Zhang if (mumps->use_petsc_omp_support) { 17849566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 17859566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 17869566063dSJacob Faibussowitsch PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 178767602552SJunchao Zhang } 17883ab56b82SJunchao Zhang #endif 17899566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ia_alloc)); 17909566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->ja_alloc)); 17919566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->recvcount)); 17929566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->reqs)); 17939566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->irhs_loc)); 17949d0448ceSStefano Zampini PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 17959d0448ceSStefano Zampini PetscCall(PetscFree(mumps->nest_vals)); 17969566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1797bf0cc555SLisandro Dalcin 179897969023SHong Zhang /* clear composed functions */ 17999566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 18009566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 18019566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 18029566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 18039566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 18049566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 18059566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 18069566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 18079566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 18089566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 18099566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 18105c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 18119566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 18129566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 181393d70b8aSPierre Jolivet PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL)); 18143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1815397b6df1SKris Buschelman } 1816397b6df1SKris Buschelman 181767602552SJunchao Zhang /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1818d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1819d71ae5a4SJacob Faibussowitsch { 182067602552SJunchao Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 182167602552SJunchao Zhang const PetscMPIInt ompsize = mumps->omp_comm_size; 182267602552SJunchao Zhang PetscInt i, m, M, rstart; 182367602552SJunchao Zhang 182467602552SJunchao Zhang PetscFunctionBegin; 18259566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &M, NULL)); 18269566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &m, NULL)); 182708401ef6SPierre Jolivet PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 182867602552SJunchao Zhang if (ompsize == 1) { 182967602552SJunchao Zhang if (!mumps->irhs_loc) { 18306497c311SBarry Smith mumps->nloc_rhs = (PetscMUMPSInt)m; 18319566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 18329566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 18336497c311SBarry Smith for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */ 183467602552SJunchao Zhang } 1835cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc)); 183667602552SJunchao Zhang } else { 183767602552SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 183867602552SJunchao Zhang const PetscInt *ranges; 183967602552SJunchao Zhang PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 184067602552SJunchao Zhang MPI_Group petsc_group, omp_group; 184167602552SJunchao Zhang PetscScalar *recvbuf = NULL; 184267602552SJunchao Zhang 184367602552SJunchao Zhang if (mumps->is_omp_master) { 184467602552SJunchao Zhang /* Lazily initialize the omp stuff for distributed rhs */ 184567602552SJunchao Zhang if (!mumps->irhs_loc) { 18469566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 18479566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 18489566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 18499566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 185067602552SJunchao Zhang for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 18519566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 185267602552SJunchao Zhang 185367602552SJunchao Zhang /* Populate mumps->irhs_loc[], rhs_nrow[] */ 185467602552SJunchao Zhang mumps->nloc_rhs = 0; 18559566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(A, &ranges)); 185667602552SJunchao Zhang for (j = 0; j < ompsize; j++) { 185767602552SJunchao Zhang mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 185867602552SJunchao Zhang mumps->nloc_rhs += mumps->rhs_nrow[j]; 185967602552SJunchao Zhang } 18609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 186167602552SJunchao Zhang for (j = k = 0; j < ompsize; j++) { 1862407b358cSPierre Jolivet for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */ 186367602552SJunchao Zhang } 186467602552SJunchao Zhang 18659566063dSJacob Faibussowitsch PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 18669566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&petsc_group)); 18679566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&omp_group)); 186867602552SJunchao Zhang } 186967602552SJunchao Zhang 187067602552SJunchao Zhang /* Realloc buffers when current nrhs is bigger than what we have met */ 187167602552SJunchao Zhang if (nrhs > mumps->max_nrhs) { 18729566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 18739566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 187467602552SJunchao Zhang mumps->max_nrhs = nrhs; 187567602552SJunchao Zhang } 187667602552SJunchao Zhang 187767602552SJunchao Zhang /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 18789566063dSJacob Faibussowitsch for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 187967602552SJunchao Zhang mumps->rhs_disps[0] = 0; 188067602552SJunchao Zhang for (j = 1; j < ompsize; j++) { 188167602552SJunchao Zhang mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 188208401ef6SPierre Jolivet PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 188367602552SJunchao Zhang } 188467602552SJunchao Zhang recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 188567602552SJunchao Zhang } 188667602552SJunchao Zhang 18879566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 18889566063dSJacob Faibussowitsch PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 188967602552SJunchao Zhang 189067602552SJunchao Zhang if (mumps->is_omp_master) { 189167602552SJunchao Zhang if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 189267602552SJunchao Zhang PetscScalar *dst, *dstbase = mumps->rhs_loc; 189367602552SJunchao Zhang for (j = 0; j < ompsize; j++) { 189467602552SJunchao Zhang const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 189567602552SJunchao Zhang dst = dstbase; 189667602552SJunchao Zhang for (i = 0; i < nrhs; i++) { 18979566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 189867602552SJunchao Zhang src += mumps->rhs_nrow[j]; 189967602552SJunchao Zhang dst += mumps->nloc_rhs; 190067602552SJunchao Zhang } 190167602552SJunchao Zhang dstbase += mumps->rhs_nrow[j]; 190267602552SJunchao Zhang } 190367602552SJunchao Zhang } 1904cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc)); 190567602552SJunchao Zhang } 190667602552SJunchao Zhang #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 190767602552SJunchao Zhang } 19086497c311SBarry Smith mumps->id.nrhs = (PetscMUMPSInt)nrhs; 19096497c311SBarry Smith mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs; 191067602552SJunchao Zhang mumps->id.lrhs_loc = mumps->nloc_rhs; 191167602552SJunchao Zhang mumps->id.irhs_loc = mumps->irhs_loc; 19123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 191367602552SJunchao Zhang } 191467602552SJunchao Zhang 191566976f2fSJacob Faibussowitsch static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1916d71ae5a4SJacob Faibussowitsch { 1917e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1918cf053153SJunchao Zhang const PetscScalar *barray = NULL; 1919d54de34fSKris Buschelman PetscScalar *array; 1920329ec9b3SHong Zhang IS is_iden, is_petsc; 1921329ec9b3SHong Zhang PetscInt i; 1922cc86f929SStefano Zampini PetscBool second_solve = PETSC_FALSE; 1923883f2eb9SBarry Smith static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1924397b6df1SKris Buschelman 1925397b6df1SKris Buschelman PetscFunctionBegin; 19269371c9d4SSatish Balay PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 19279371c9d4SSatish Balay "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 19289371c9d4SSatish Balay &cite1)); 19299371c9d4SSatish Balay PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 19309371c9d4SSatish Balay "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 19319371c9d4SSatish Balay &cite2)); 19322aca8efcSHong Zhang 1933f480ea8aSBarry Smith PetscCall(VecFlag(x, A->factorerrortype)); 1934603e8f96SBarry Smith if (A->factorerrortype) { 19359566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 19363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 19372aca8efcSHong Zhang } 19382aca8efcSHong Zhang 1939a5e57a09SHong Zhang mumps->id.nrhs = 1; 19402d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 194125aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 1942cf053153SJunchao Zhang mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */ 1943cf053153SJunchao Zhang PetscCall(VecGetArrayRead(b, &barray)); 1944cf053153SJunchao Zhang PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray)); 194525aac85cSJunchao Zhang } else { 1946cf053153SJunchao Zhang mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/ 19479566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 19489566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 194967602552SJunchao Zhang if (!mumps->myid) { 19509566063dSJacob Faibussowitsch PetscCall(VecGetArray(mumps->b_seq, &array)); 1951cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 195267602552SJunchao Zhang } 195325aac85cSJunchao Zhang } 1954cf053153SJunchao Zhang } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */ 1955cf053153SJunchao Zhang mumps->id.ICNTL(20) = 0; 19569566063dSJacob Faibussowitsch PetscCall(VecCopy(b, x)); 19579566063dSJacob Faibussowitsch PetscCall(VecGetArray(x, &array)); 1958cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 1959397b6df1SKris Buschelman } 1960397b6df1SKris Buschelman 1961cc86f929SStefano Zampini /* 1962cc86f929SStefano Zampini handle condensation step of Schur complement (if any) 1963cc86f929SStefano Zampini We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1964cc86f929SStefano Zampini According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1965cc86f929SStefano Zampini Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1966cc86f929SStefano Zampini This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1967cc86f929SStefano Zampini */ 19683e5b40d0SPierre Jolivet if (mumps->id.size_schur > 0) { 196908401ef6SPierre Jolivet PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 19703e5b40d0SPierre Jolivet if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1971cc86f929SStefano Zampini second_solve = PETSC_TRUE; 1972cf053153SJunchao Zhang PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs 19733e5b40d0SPierre Jolivet mumps->id.ICNTL(26) = 1; /* condensation phase */ 19743e5b40d0SPierre Jolivet } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1975cc86f929SStefano Zampini } 1976cf053153SJunchao Zhang 1977a5e57a09SHong Zhang mumps->id.job = JOB_SOLVE; 1978cf053153SJunchao Zhang PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs 1979cf053153SJunchao Zhang PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 1980397b6df1SKris Buschelman 1981b5fa320bSStefano Zampini /* handle expansion step of Schur complement (if any) */ 19821baa6e33SBarry Smith if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1983cf053153SJunchao Zhang else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side 19843e5b40d0SPierre Jolivet PetscCall(MatMumpsSolveSchur_Private(A)); 1985cf053153SJunchao Zhang for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i); 19863e5b40d0SPierre Jolivet } 1987b5fa320bSStefano Zampini 1988f0b74427SPierre Jolivet if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */ 1989a5e57a09SHong Zhang if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1990cf053153SJunchao Zhang /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 19919566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1992397b6df1SKris Buschelman } 1993a5e57a09SHong Zhang if (!mumps->scat_sol) { /* create scatter scat_sol */ 1994a6053eceSJunchao Zhang PetscInt *isol2_loc = NULL; 19959566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 19969566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1997a6053eceSJunchao Zhang for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 19989566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 19999566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 20009566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_iden)); 20019566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_petsc)); 2002a5e57a09SHong Zhang mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 2003397b6df1SKris Buschelman } 2004a5e57a09SHong Zhang 2005cf053153SJunchao Zhang PetscScalar *xarray; 2006cf053153SJunchao Zhang PetscCall(VecGetArray(mumps->x_seq, &xarray)); 2007cf053153SJunchao Zhang PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray)); 2008cf053153SJunchao Zhang PetscCall(VecRestoreArray(mumps->x_seq, &xarray)); 20099566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 20109566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 2011353d7d71SJunchao Zhang 2012cf053153SJunchao Zhang if (mumps->ICNTL20 == 10) { // distributed RHS 2013cf053153SJunchao Zhang PetscCall(VecRestoreArrayRead(b, &barray)); 2014cf053153SJunchao Zhang } else if (!mumps->myid) { // centralized RHS 20159566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mumps->b_seq, &array)); 201625aac85cSJunchao Zhang } 2017cf053153SJunchao Zhang } else { 2018cf053153SJunchao Zhang // id.rhs has the solution in mumps precision 2019cf053153SJunchao Zhang PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array)); 2020cf053153SJunchao Zhang PetscCall(VecRestoreArray(x, &array)); 2021cf053153SJunchao Zhang } 2022353d7d71SJunchao Zhang 202364412097SPierre Jolivet PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 20243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2025397b6df1SKris Buschelman } 2026397b6df1SKris Buschelman 202766976f2fSJacob Faibussowitsch static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 2028d71ae5a4SJacob Faibussowitsch { 2029e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2030338d3105SPierre Jolivet const PetscMUMPSInt value = mumps->id.ICNTL(9); 203151d5961aSHong Zhang 203251d5961aSHong Zhang PetscFunctionBegin; 2033a5e57a09SHong Zhang mumps->id.ICNTL(9) = 0; 20349566063dSJacob Faibussowitsch PetscCall(MatSolve_MUMPS(A, b, x)); 2035338d3105SPierre Jolivet mumps->id.ICNTL(9) = value; 20363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 203751d5961aSHong Zhang } 203851d5961aSHong Zhang 203966976f2fSJacob Faibussowitsch static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 2040d71ae5a4SJacob Faibussowitsch { 2041b8491c3eSStefano Zampini Mat Bt = NULL; 2042a6053eceSJunchao Zhang PetscBool denseX, denseB, flg, flgT; 2043e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2044917c3dccSPierre Jolivet PetscInt i, nrhs, M, nrhsM; 20451683a169SBarry Smith PetscScalar *array; 2046cf053153SJunchao Zhang const PetscScalar *barray; 2047a6053eceSJunchao Zhang PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 2048a6053eceSJunchao Zhang PetscMUMPSInt *isol_loc, *isol_loc_save; 2049cf053153SJunchao Zhang PetscScalar *sol_loc; 2050cf053153SJunchao Zhang void *sol_loc_save; 2051cf053153SJunchao Zhang PetscCount sol_loc_len_save; 2052be818407SHong Zhang IS is_to, is_from; 2053beae5ec0SHong Zhang PetscInt k, proc, j, m, myrstart; 2054be818407SHong Zhang const PetscInt *rstart; 205567602552SJunchao Zhang Vec v_mpi, msol_loc; 205667602552SJunchao Zhang VecScatter scat_sol; 205767602552SJunchao Zhang Vec b_seq; 205867602552SJunchao Zhang VecScatter scat_rhs; 2059be818407SHong Zhang PetscScalar *aa; 2060be818407SHong Zhang PetscInt spnr, *ia, *ja; 2061d56c302dSHong Zhang Mat_MPIAIJ *b = NULL; 2062bda8bf91SBarry Smith 2063e0b74bf9SHong Zhang PetscFunctionBegin; 20649566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 206528b400f6SJacob Faibussowitsch PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 2066be818407SHong Zhang 20679566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 2068cf053153SJunchao Zhang 2069a6053eceSJunchao Zhang if (denseB) { 207008401ef6SPierre Jolivet PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 2071be818407SHong Zhang mumps->id.ICNTL(20) = 0; /* dense RHS */ 20720e6b8875SHong Zhang } else { /* sparse B */ 207308401ef6SPierre Jolivet PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 2074013e2dc7SBarry Smith PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 207553587d93SPierre Jolivet PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 207653587d93SPierre Jolivet PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 207753587d93SPierre Jolivet /* input B is transpose of actual RHS matrix, 20780e6b8875SHong Zhang because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 20799566063dSJacob Faibussowitsch PetscCall(MatTransposeGetMat(B, &Bt)); 2080be818407SHong Zhang mumps->id.ICNTL(20) = 1; /* sparse RHS */ 2081b8491c3eSStefano Zampini } 208287b22cf4SHong Zhang 20839566063dSJacob Faibussowitsch PetscCall(MatGetSize(B, &M, &nrhs)); 2084917c3dccSPierre Jolivet PetscCall(PetscIntMultError(nrhs, M, &nrhsM)); 20856497c311SBarry Smith mumps->id.nrhs = (PetscMUMPSInt)nrhs; 20866497c311SBarry Smith mumps->id.lrhs = (PetscMUMPSInt)M; 20879481e6e9SHong Zhang 2088cf053153SJunchao Zhang if (mumps->petsc_size == 1) { // handle this easy case specially and return early 2089b8491c3eSStefano Zampini PetscScalar *aa; 2090b8491c3eSStefano Zampini PetscInt spnr, *ia, *ja; 2091e94cce23SStefano Zampini PetscBool second_solve = PETSC_FALSE; 2092b8491c3eSStefano Zampini 20939566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &array)); 2094a6053eceSJunchao Zhang if (denseB) { 20952b691707SHong Zhang /* copy B to X */ 2096cf053153SJunchao Zhang PetscCall(MatDenseGetArrayRead(B, &barray)); 2097cf053153SJunchao Zhang PetscCall(PetscArraycpy(array, barray, nrhsM)); 2098cf053153SJunchao Zhang PetscCall(MatDenseRestoreArrayRead(B, &barray)); 20992b691707SHong Zhang } else { /* sparse B */ 21009566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(Bt, &aa)); 21019566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 210228b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 21039566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2104cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 2105b8491c3eSStefano Zampini } 2106cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 2107cf053153SJunchao Zhang 2108e94cce23SStefano Zampini /* handle condensation step of Schur complement (if any) */ 21093e5b40d0SPierre Jolivet if (mumps->id.size_schur > 0) { 21103e5b40d0SPierre Jolivet if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 2111e94cce23SStefano Zampini second_solve = PETSC_TRUE; 2112cf053153SJunchao Zhang PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs 2113cf053153SJunchao Zhang mumps->id.ICNTL(26) = 1; /* condensation phase, i.e, to solve id.redrhs */ 21143e5b40d0SPierre Jolivet } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 2115e94cce23SStefano Zampini } 2116cf053153SJunchao Zhang 21172cd7d884SHong Zhang mumps->id.job = JOB_SOLVE; 21183ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 2119cf053153SJunchao Zhang PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2120b5fa320bSStefano Zampini 2121b5fa320bSStefano Zampini /* handle expansion step of Schur complement (if any) */ 21221baa6e33SBarry Smith if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 2123cf053153SJunchao Zhang else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side 21243e5b40d0SPierre Jolivet PetscCall(MatMumpsSolveSchur_Private(A)); 21253e5b40d0SPierre Jolivet for (j = 0; j < nrhs; ++j) 2126cf053153SJunchao Zhang for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs); 21273e5b40d0SPierre Jolivet } 2128cf053153SJunchao Zhang 2129cf053153SJunchao Zhang if (!denseB) { /* sparse B, restore ia, ja */ 21309566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 21319566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 213228b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 2133b8491c3eSStefano Zampini } 2134cf053153SJunchao Zhang 2135cf053153SJunchao Zhang // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X. 2136cf053153SJunchao Zhang PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array)); 21379566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &array)); 21383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2139be818407SHong Zhang } 2140801fbe65SHong Zhang 21412ef1f0ffSBarry Smith /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 214250a7cd33SPierre Jolivet PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 2143241dbb5eSStefano Zampini 2144beae5ec0SHong Zhang /* create msol_loc to hold mumps local solution */ 2145cf053153SJunchao Zhang isol_loc_save = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */ 2146cf053153SJunchao Zhang sol_loc_save = mumps->id.sol_loc; 2147cf053153SJunchao Zhang sol_loc_len_save = mumps->id.sol_loc_len; 2148cf053153SJunchao Zhang mumps->id.isol_loc = NULL; // an init state 2149cf053153SJunchao Zhang mumps->id.sol_loc = NULL; 2150cf053153SJunchao Zhang mumps->id.sol_loc_len = 0; 2151801fbe65SHong Zhang 2152a1dfcbd9SJunchao Zhang lsol_loc = mumps->id.lsol_loc; 2153917c3dccSPierre Jolivet PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */ 21549566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 2155cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc)); 2156801fbe65SHong Zhang mumps->id.isol_loc = isol_loc; 2157801fbe65SHong Zhang 21589566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 21592cd7d884SHong Zhang 216067602552SJunchao Zhang if (denseB) { 216125aac85cSJunchao Zhang if (mumps->ICNTL20 == 10) { 216267602552SJunchao Zhang mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 2163cf053153SJunchao Zhang PetscCall(MatDenseGetArrayRead(B, &barray)); 2164cf053153SJunchao Zhang PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc 2165cf053153SJunchao Zhang PetscCall(MatDenseRestoreArrayRead(B, &barray)); 21669566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(B, &m, NULL)); 2167cf053153SJunchao Zhang PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X 216825aac85cSJunchao Zhang } else { 216925aac85cSJunchao Zhang mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 217080577c12SJunchao Zhang /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 217180577c12SJunchao Zhang very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 217280577c12SJunchao Zhang 0, re-arrange B into desired order, which is a local operation. 217380577c12SJunchao Zhang */ 217480577c12SJunchao Zhang 217567602552SJunchao Zhang /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 2176be818407SHong Zhang /* wrap dense rhs matrix B into a vector v_mpi */ 21779566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(B, &m, NULL)); 2178cf053153SJunchao Zhang PetscCall(MatDenseGetArrayRead(B, &barray)); 2179cf053153SJunchao Zhang PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi)); 2180cf053153SJunchao Zhang PetscCall(MatDenseRestoreArrayRead(B, &barray)); 21812b691707SHong Zhang 2182cf053153SJunchao Zhang /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */ 2183801fbe65SHong Zhang if (!mumps->myid) { 2184beae5ec0SHong Zhang PetscInt *idx; 2185beae5ec0SHong Zhang /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 2186917c3dccSPierre Jolivet PetscCall(PetscMalloc1(nrhsM, &idx)); 21879566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(B, &rstart)); 2188917c3dccSPierre Jolivet for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) { 2189be818407SHong Zhang for (j = 0; j < nrhs; j++) { 2190beae5ec0SHong Zhang for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 2191be818407SHong Zhang } 2192be818407SHong Zhang } 2193be818407SHong Zhang 2194917c3dccSPierre Jolivet PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq)); 2195917c3dccSPierre Jolivet PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to)); 2196917c3dccSPierre Jolivet PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from)); 2197801fbe65SHong Zhang } else { 21989566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 21999566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 22009566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 2201801fbe65SHong Zhang } 2202cf053153SJunchao Zhang 22039566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 22049566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 22059566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_to)); 22069566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_from)); 22079566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 2208801fbe65SHong Zhang 2209801fbe65SHong Zhang if (!mumps->myid) { /* define rhs on the host */ 2210cf053153SJunchao Zhang PetscCall(VecGetArrayRead(b_seq, &barray)); 2211cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 2212cf053153SJunchao Zhang PetscCall(VecRestoreArrayRead(b_seq, &barray)); 2213801fbe65SHong Zhang } 221425aac85cSJunchao Zhang } 22152b691707SHong Zhang } else { /* sparse B */ 22162b691707SHong Zhang b = (Mat_MPIAIJ *)Bt->data; 22172b691707SHong Zhang 2218be818407SHong Zhang /* wrap dense X into a vector v_mpi */ 22199566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(X, &m, NULL)); 2220cf053153SJunchao Zhang PetscCall(MatDenseGetArrayRead(X, &barray)); 2221cf053153SJunchao Zhang PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi)); 2222cf053153SJunchao Zhang PetscCall(MatDenseRestoreArrayRead(X, &barray)); 22232b691707SHong Zhang 22242b691707SHong Zhang if (!mumps->myid) { 22259566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(b->A, &aa)); 22269566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 222728b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 22289566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2229cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 22302b691707SHong Zhang } else { 22312b691707SHong Zhang mumps->id.irhs_ptr = NULL; 22322b691707SHong Zhang mumps->id.irhs_sparse = NULL; 22332b691707SHong Zhang mumps->id.nz_rhs = 0; 2234cf053153SJunchao Zhang if (mumps->id.rhs_sparse_len) { 2235cf053153SJunchao Zhang PetscCall(PetscFree(mumps->id.rhs_sparse)); 2236cf053153SJunchao Zhang mumps->id.rhs_sparse_len = 0; 2237cf053153SJunchao Zhang } 22382b691707SHong Zhang } 22392b691707SHong Zhang } 22402b691707SHong Zhang 2241801fbe65SHong Zhang /* solve phase */ 2242801fbe65SHong Zhang mumps->id.job = JOB_SOLVE; 22433ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 22449261f6e4SBarry Smith PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2245801fbe65SHong Zhang 2246f0b74427SPierre Jolivet /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */ 22479566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &array)); 22489566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(v_mpi, array)); 2249801fbe65SHong Zhang 2250334c5f61SHong Zhang /* create scatter scat_sol */ 22519566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(X, &rstart)); 2252f0b74427SPierre Jolivet /* iidx: index for scatter mumps solution to PETSc X */ 2253beae5ec0SHong Zhang 22549566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 22559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 2256beae5ec0SHong Zhang for (i = 0; i < lsol_loc; i++) { 2257beae5ec0SHong Zhang isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 2258beae5ec0SHong Zhang 22592d4298aeSJunchao Zhang for (proc = 0; proc < mumps->petsc_size; proc++) { 2260beae5ec0SHong Zhang if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 2261beae5ec0SHong Zhang myrstart = rstart[proc]; 2262f0b74427SPierre Jolivet k = isol_loc[i] - myrstart; /* local index on 1st column of PETSc vector X */ 2263f0b74427SPierre Jolivet iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to PETSc index in X */ 2264beae5ec0SHong Zhang m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 2265beae5ec0SHong Zhang break; 2266be818407SHong Zhang } 2267be818407SHong Zhang } 2268be818407SHong Zhang 2269beae5ec0SHong Zhang for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 2270801fbe65SHong Zhang } 22719566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 2272cf053153SJunchao Zhang PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[] 22739566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 22749566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 22759566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_from)); 22769566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_to)); 22779566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 22789566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &array)); 227971aed81dSHong Zhang 2280cf053153SJunchao Zhang if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers 2281cf053153SJunchao Zhang mumps->id.sol_loc_len = 0; 2282cf053153SJunchao Zhang PetscCall(PetscFree(mumps->id.sol_loc)); 2283cf053153SJunchao Zhang } 2284cf053153SJunchao Zhang 2285cf053153SJunchao Zhang // restore old values 2286cf053153SJunchao Zhang mumps->id.sol_loc = sol_loc_save; 2287cf053153SJunchao Zhang mumps->id.sol_loc_len = sol_loc_len_save; 228871aed81dSHong Zhang mumps->id.isol_loc = isol_loc_save; 228971aed81dSHong Zhang 22909566063dSJacob Faibussowitsch PetscCall(PetscFree2(sol_loc, isol_loc)); 22919566063dSJacob Faibussowitsch PetscCall(PetscFree(idxx)); 22929566063dSJacob Faibussowitsch PetscCall(VecDestroy(&msol_loc)); 22939566063dSJacob Faibussowitsch PetscCall(VecDestroy(&v_mpi)); 2294a6053eceSJunchao Zhang if (!denseB) { 22952b691707SHong Zhang if (!mumps->myid) { 2296d56c302dSHong Zhang b = (Mat_MPIAIJ *)Bt->data; 22979566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 22989566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 229928b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 23002b691707SHong Zhang } 23012b691707SHong Zhang } else { 230225aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { 23039566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b_seq)); 23049566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&scat_rhs)); 230525aac85cSJunchao Zhang } 23062b691707SHong Zhang } 23079566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&scat_sol)); 230857508eceSPierre Jolivet PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 23093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2310e0b74bf9SHong Zhang } 2311e0b74bf9SHong Zhang 231266976f2fSJacob Faibussowitsch static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 2313d71ae5a4SJacob Faibussowitsch { 2314b18964edSHong Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2315338d3105SPierre Jolivet const PetscMUMPSInt value = mumps->id.ICNTL(9); 2316b18964edSHong Zhang 2317b18964edSHong Zhang PetscFunctionBegin; 2318b18964edSHong Zhang mumps->id.ICNTL(9) = 0; 2319b18964edSHong Zhang PetscCall(MatMatSolve_MUMPS(A, B, X)); 2320338d3105SPierre Jolivet mumps->id.ICNTL(9) = value; 23213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2322b18964edSHong Zhang } 2323b18964edSHong Zhang 232466976f2fSJacob Faibussowitsch static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 2325d71ae5a4SJacob Faibussowitsch { 2326eb3ef3b2SHong Zhang PetscBool flg; 2327eb3ef3b2SHong Zhang Mat B; 2328eb3ef3b2SHong Zhang 2329eb3ef3b2SHong Zhang PetscFunctionBegin; 23309566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 233128b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 2332eb3ef3b2SHong Zhang 2333eb3ef3b2SHong Zhang /* Create B=Bt^T that uses Bt's data structure */ 23349566063dSJacob Faibussowitsch PetscCall(MatCreateTranspose(Bt, &B)); 2335eb3ef3b2SHong Zhang 23369566063dSJacob Faibussowitsch PetscCall(MatMatSolve_MUMPS(A, B, X)); 23379566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 23383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2339eb3ef3b2SHong Zhang } 2340eb3ef3b2SHong Zhang 2341ace3df97SHong Zhang #if !defined(PETSC_USE_COMPLEX) 2342a58c3f20SHong Zhang /* 2343a58c3f20SHong Zhang input: 2344a58c3f20SHong Zhang F: numeric factor 2345a58c3f20SHong Zhang output: 2346a58c3f20SHong Zhang nneg: total number of negative pivots 234719d49a3bSHong Zhang nzero: total number of zero pivots 234819d49a3bSHong Zhang npos: (global dimension of F) - nneg - nzero 2349a58c3f20SHong Zhang */ 235066976f2fSJacob Faibussowitsch static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 2351d71ae5a4SJacob Faibussowitsch { 2352e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2353c1490034SHong Zhang PetscMPIInt size; 2354a58c3f20SHong Zhang 2355a58c3f20SHong Zhang PetscFunctionBegin; 23569566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 2357bcb30aebSHong Zhang /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 2358aed4548fSBarry Smith PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 2359ed85ac9fSHong Zhang 2360710ac8efSHong Zhang if (nneg) *nneg = mumps->id.INFOG(12); 2361ed85ac9fSHong Zhang if (nzero || npos) { 236208401ef6SPierre Jolivet PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2363710ac8efSHong Zhang if (nzero) *nzero = mumps->id.INFOG(28); 2364710ac8efSHong Zhang if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 2365a58c3f20SHong Zhang } 23663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2367a58c3f20SHong Zhang } 236819d49a3bSHong Zhang #endif 2369a58c3f20SHong Zhang 237066976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 2371d71ae5a4SJacob Faibussowitsch { 23726497c311SBarry Smith PetscMPIInt nreqs; 2373a6053eceSJunchao Zhang PetscMUMPSInt *irn, *jcn; 2374a6053eceSJunchao Zhang PetscMPIInt count; 23756497c311SBarry Smith PetscCount totnnz, remain; 2376a6053eceSJunchao Zhang const PetscInt osize = mumps->omp_comm_size; 2377a6053eceSJunchao Zhang PetscScalar *val; 23783ab56b82SJunchao Zhang 23793ab56b82SJunchao Zhang PetscFunctionBegin; 2380a6053eceSJunchao Zhang if (osize > 1) { 23813ab56b82SJunchao Zhang if (reuse == MAT_INITIAL_MATRIX) { 23823ab56b82SJunchao Zhang /* master first gathers counts of nonzeros to receive */ 23839566063dSJacob Faibussowitsch if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 23849566063dSJacob Faibussowitsch PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 23853ab56b82SJunchao Zhang 2386a6053eceSJunchao Zhang /* Then each computes number of send/recvs */ 23873ab56b82SJunchao Zhang if (mumps->is_omp_master) { 2388a6053eceSJunchao Zhang /* Start from 1 since self communication is not done in MPI */ 2389a6053eceSJunchao Zhang nreqs = 0; 23906497c311SBarry Smith for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 2391a6053eceSJunchao Zhang } else { 23926497c311SBarry Smith nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX)); 23933ab56b82SJunchao Zhang } 239435cb6cd3SPierre Jolivet PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 23953ab56b82SJunchao Zhang 2396a6053eceSJunchao Zhang /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 2397a6053eceSJunchao Zhang MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 2398a6053eceSJunchao Zhang might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 2399a6053eceSJunchao Zhang is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 2400a6053eceSJunchao Zhang */ 2401a6053eceSJunchao Zhang nreqs = 0; /* counter for actual send/recvs */ 24023ab56b82SJunchao Zhang if (mumps->is_omp_master) { 24036497c311SBarry Smith totnnz = 0; 24046497c311SBarry Smith 24056497c311SBarry Smith for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 24069566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 24079566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(totnnz, &val)); 2408a6053eceSJunchao Zhang 2409a6053eceSJunchao Zhang /* Self communication */ 24109566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 24119566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 24129566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 2413a6053eceSJunchao Zhang 2414a6053eceSJunchao Zhang /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 24159566063dSJacob Faibussowitsch PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 24169566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->val_alloc)); 2417a6053eceSJunchao Zhang mumps->nnz = totnnz; 24183ab56b82SJunchao Zhang mumps->irn = irn; 24193ab56b82SJunchao Zhang mumps->jcn = jcn; 2420a6053eceSJunchao Zhang mumps->val = mumps->val_alloc = val; 2421a6053eceSJunchao Zhang 2422a6053eceSJunchao Zhang irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 2423a6053eceSJunchao Zhang jcn += mumps->recvcount[0]; 2424a6053eceSJunchao Zhang val += mumps->recvcount[0]; 2425a6053eceSJunchao Zhang 2426a6053eceSJunchao Zhang /* Remote communication */ 24276497c311SBarry Smith for (PetscMPIInt i = 1; i < osize; i++) { 24286497c311SBarry Smith count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2429a6053eceSJunchao Zhang remain = mumps->recvcount[i] - count; 2430a6053eceSJunchao Zhang while (count > 0) { 24316497c311SBarry Smith PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 24326497c311SBarry Smith PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 24336497c311SBarry Smith PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2434a6053eceSJunchao Zhang irn += count; 2435a6053eceSJunchao Zhang jcn += count; 2436a6053eceSJunchao Zhang val += count; 24376497c311SBarry Smith count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2438a6053eceSJunchao Zhang remain -= count; 2439a6053eceSJunchao Zhang } 24403ab56b82SJunchao Zhang } 24413ab56b82SJunchao Zhang } else { 2442a6053eceSJunchao Zhang irn = mumps->irn; 2443a6053eceSJunchao Zhang jcn = mumps->jcn; 2444a6053eceSJunchao Zhang val = mumps->val; 24456497c311SBarry Smith count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2446a6053eceSJunchao Zhang remain = mumps->nnz - count; 2447a6053eceSJunchao Zhang while (count > 0) { 24486497c311SBarry Smith PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 24496497c311SBarry Smith PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 24506497c311SBarry Smith PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2451a6053eceSJunchao Zhang irn += count; 2452a6053eceSJunchao Zhang jcn += count; 2453a6053eceSJunchao Zhang val += count; 24546497c311SBarry Smith count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2455a6053eceSJunchao Zhang remain -= count; 24563ab56b82SJunchao Zhang } 24573ab56b82SJunchao Zhang } 2458a6053eceSJunchao Zhang } else { 2459a6053eceSJunchao Zhang nreqs = 0; 2460a6053eceSJunchao Zhang if (mumps->is_omp_master) { 2461a6053eceSJunchao Zhang val = mumps->val + mumps->recvcount[0]; 24626497c311SBarry Smith for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 24636497c311SBarry Smith count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2464a6053eceSJunchao Zhang remain = mumps->recvcount[i] - count; 2465a6053eceSJunchao Zhang while (count > 0) { 24666497c311SBarry Smith PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2467a6053eceSJunchao Zhang val += count; 24686497c311SBarry Smith count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2469a6053eceSJunchao Zhang remain -= count; 2470a6053eceSJunchao Zhang } 2471a6053eceSJunchao Zhang } 2472a6053eceSJunchao Zhang } else { 2473a6053eceSJunchao Zhang val = mumps->val; 24746497c311SBarry Smith count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2475a6053eceSJunchao Zhang remain = mumps->nnz - count; 2476a6053eceSJunchao Zhang while (count > 0) { 24776497c311SBarry Smith PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2478a6053eceSJunchao Zhang val += count; 24796497c311SBarry Smith count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2480a6053eceSJunchao Zhang remain -= count; 2481a6053eceSJunchao Zhang } 2482a6053eceSJunchao Zhang } 2483a6053eceSJunchao Zhang } 24849566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 2485a6053eceSJunchao Zhang mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 2486a6053eceSJunchao Zhang } 24873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 24883ab56b82SJunchao Zhang } 24893ab56b82SJunchao Zhang 2490d2a308c1SPierre Jolivet static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info) 2491d71ae5a4SJacob Faibussowitsch { 249257508eceSPierre Jolivet Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2493397b6df1SKris Buschelman 2494397b6df1SKris Buschelman PetscFunctionBegin; 2495dbf6bb8dSprj- if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 249648a46eb9SPierre Jolivet if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 24979566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 24983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 24992aca8efcSHong Zhang } 25006baea169SHong Zhang 25019566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 25029566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 2503397b6df1SKris Buschelman 2504397b6df1SKris Buschelman /* numerical factorization phase */ 2505a5e57a09SHong Zhang mumps->id.job = JOB_FACTNUMERIC; 25064e34a73bSHong Zhang if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2507cf053153SJunchao Zhang if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2508397b6df1SKris Buschelman } else { 2509cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2510397b6df1SKris Buschelman } 2511cf053153SJunchao Zhang 2512cf053153SJunchao Zhang if (F->schur) { 2513cf053153SJunchao Zhang const PetscScalar *array; 2514cf053153SJunchao Zhang MUMPS_INT size = mumps->id.size_schur; 2515cf053153SJunchao Zhang PetscCall(MatDenseGetArrayRead(F->schur, &array)); 2516cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur)); 2517cf053153SJunchao Zhang PetscCall(MatDenseRestoreArrayRead(F->schur, &array)); 2518cf053153SJunchao Zhang } 2519cf053153SJunchao Zhang 25203ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 2521a5e57a09SHong Zhang if (mumps->id.INFOG(1) < 0) { 25229261f6e4SBarry Smith PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 25239261f6e4SBarry Smith if (mumps->id.INFOG(1) == -10) { 25249261f6e4SBarry Smith PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2525603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2526c0d63f2fSHong Zhang } else if (mumps->id.INFOG(1) == -13) { 25279261f6e4SBarry Smith PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2528603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2529c0d63f2fSHong Zhang } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2530bdcd51b8SPierre Jolivet PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2531603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 25322aca8efcSHong Zhang } else { 25339261f6e4SBarry Smith PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2534603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OTHER; 2535151787a6SHong Zhang } 25362aca8efcSHong Zhang } 25379261f6e4SBarry Smith PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2538397b6df1SKris Buschelman 2539b3cb21ddSStefano Zampini F->assembled = PETSC_TRUE; 2540d47f36abSHong Zhang 2541b3cb21ddSStefano Zampini if (F->schur) { /* reset Schur status to unfactored */ 25423cb7dd0eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 2543c70f7ee4SJunchao Zhang F->schur->offloadmask = PETSC_OFFLOAD_CPU; 25443cb7dd0eSStefano Zampini #endif 2545cf053153SJunchao Zhang PetscScalar *array; 2546cf053153SJunchao Zhang PetscCall(MatDenseGetArray(F->schur, &array)); 2547cf053153SJunchao Zhang PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array)); 2548cf053153SJunchao Zhang PetscCall(MatDenseRestoreArray(F->schur, &array)); 2549b3cb21ddSStefano Zampini if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2550b3cb21ddSStefano Zampini mumps->id.ICNTL(19) = 2; 25519566063dSJacob Faibussowitsch PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2552b3cb21ddSStefano Zampini } 25539566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2554b3cb21ddSStefano Zampini } 255567877ebaSShri Abhyankar 2556066565c5SStefano Zampini /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2557066565c5SStefano Zampini if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2558066565c5SStefano Zampini 25593ab56b82SJunchao Zhang if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2560cf053153SJunchao Zhang // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the 2561cf053153SJunchao Zhang // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for 2562cf053153SJunchao Zhang // which the solution (in SOL_loc) is available on the local processor. 2563cf053153SJunchao Zhang // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will 2564cf053153SJunchao Zhang // normally have the same contents for each of these calls. The only exception is the case of 2565cf053153SJunchao Zhang // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non 2566cf053153SJunchao Zhang // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase. 25672d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 256867877ebaSShri Abhyankar PetscInt lsol_loc; 2569cf053153SJunchao Zhang PetscScalar *array; 2570c2093ab7SHong Zhang 2571c2093ab7SHong Zhang /* distributed solution; Create x_seq=sol_loc for repeated use */ 2572c2093ab7SHong Zhang if (mumps->x_seq) { 25739566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2574cf053153SJunchao Zhang PetscCall(PetscFree(mumps->id.isol_loc)); 25759566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mumps->x_seq)); 2576c2093ab7SHong Zhang } 2577a5e57a09SHong Zhang lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2578cf053153SJunchao Zhang PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc)); 2579cf053153SJunchao Zhang PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq)); 2580cf053153SJunchao Zhang PetscCall(VecGetArray(mumps->x_seq, &array)); 2581cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc)); 2582cf053153SJunchao Zhang PetscCall(VecRestoreArray(mumps->x_seq, &array)); 25836497c311SBarry Smith mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc; 258467877ebaSShri Abhyankar } 2585cf053153SJunchao Zhang PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2))); 25863ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2587397b6df1SKris Buschelman } 2588397b6df1SKris Buschelman 25899a2535b5SHong Zhang /* Sets MUMPS options from the options database */ 259066976f2fSJacob Faibussowitsch static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2591d71ae5a4SJacob Faibussowitsch { 2592e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2593cf053153SJunchao Zhang PetscReal cntl; 2594413bcc21SPierre Jolivet PetscMUMPSInt icntl = 0, size, *listvar_schur; 259545e3843bSPierre Jolivet PetscInt info[80], i, ninfo = 80, rbs, cbs; 2596cf053153SJunchao Zhang PetscBool flg = PETSC_FALSE; 2597cf053153SJunchao Zhang PetscBool schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1); 2598cf053153SJunchao Zhang void *arr; 2599dcd589f8SShri Abhyankar 2600dcd589f8SShri Abhyankar PetscFunctionBegin; 260126cc229bSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2602413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2603cf053153SJunchao Zhang PetscPrecision precision = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE; 2604413bcc21SPierre Jolivet PetscInt nthreads = 0; 2605413bcc21SPierre Jolivet PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2606413bcc21SPierre Jolivet PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 260793d70b8aSPierre Jolivet PetscMUMPSInt nblk, *blkvar, *blkptr; 2608413bcc21SPierre Jolivet 2609413bcc21SPierre Jolivet mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2610413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2611413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2612413bcc21SPierre Jolivet 2613413bcc21SPierre Jolivet PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2614413bcc21SPierre Jolivet if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2615413bcc21SPierre Jolivet /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2616413bcc21SPierre Jolivet PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2617413bcc21SPierre Jolivet if (mumps->use_petsc_omp_support) { 2618413bcc21SPierre Jolivet PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2619413bcc21SPierre Jolivet #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2620413bcc21SPierre Jolivet PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2621413bcc21SPierre Jolivet PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2622ea17275aSJose E. Roman #else 2623ea17275aSJose E. Roman SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2624ea17275aSJose E. Roman ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2625413bcc21SPierre Jolivet #endif 2626413bcc21SPierre Jolivet } else { 2627413bcc21SPierre Jolivet mumps->omp_comm = PETSC_COMM_SELF; 2628413bcc21SPierre Jolivet mumps->mumps_comm = mumps->petsc_comm; 2629413bcc21SPierre Jolivet mumps->is_omp_master = PETSC_TRUE; 2630413bcc21SPierre Jolivet } 2631413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2632413bcc21SPierre Jolivet mumps->reqs = NULL; 2633413bcc21SPierre Jolivet mumps->tag = 0; 2634413bcc21SPierre Jolivet 2635413bcc21SPierre Jolivet if (mumps->mumps_comm != MPI_COMM_NULL) { 2636413bcc21SPierre Jolivet if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2637413bcc21SPierre Jolivet /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2638413bcc21SPierre Jolivet MPI_Comm comm; 2639413bcc21SPierre Jolivet PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2640413bcc21SPierre Jolivet mumps->mumps_comm = comm; 2641413bcc21SPierre Jolivet } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2642413bcc21SPierre Jolivet } 2643413bcc21SPierre Jolivet 2644413bcc21SPierre Jolivet mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2645413bcc21SPierre Jolivet mumps->id.job = JOB_INIT; 2646413bcc21SPierre Jolivet mumps->id.par = 1; /* host participates factorizaton and solve */ 2647413bcc21SPierre Jolivet mumps->id.sym = mumps->sym; 2648413bcc21SPierre Jolivet 2649413bcc21SPierre Jolivet size = mumps->id.size_schur; 2650413bcc21SPierre Jolivet arr = mumps->id.schur; 2651413bcc21SPierre Jolivet listvar_schur = mumps->id.listvar_schur; 265293d70b8aSPierre Jolivet nblk = mumps->id.nblk; 265393d70b8aSPierre Jolivet blkvar = mumps->id.blkvar; 265493d70b8aSPierre Jolivet blkptr = mumps->id.blkptr; 265596eb7ee0SStefano Zampini if (PetscDefined(USE_DEBUG)) { 265696eb7ee0SStefano Zampini for (PetscInt i = 0; i < size; i++) 265796eb7ee0SStefano Zampini PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1, 265896eb7ee0SStefano Zampini A->rmap->N); 265996eb7ee0SStefano Zampini } 266096eb7ee0SStefano Zampini 2661cf053153SJunchao Zhang PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL)); 2662cf053153SJunchao Zhang PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]); 2663cf053153SJunchao Zhang PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar"); 2664cf053153SJunchao Zhang PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision)); 2665cf053153SJunchao Zhang 2666413bcc21SPierre Jolivet PetscMUMPS_c(mumps); 26679261f6e4SBarry Smith PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 266851ad14ebSPierre Jolivet 266951ad14ebSPierre Jolivet /* set PETSc-MUMPS default options - override MUMPS default */ 267051ad14ebSPierre Jolivet mumps->id.ICNTL(3) = 0; 267151ad14ebSPierre Jolivet mumps->id.ICNTL(4) = 0; 267251ad14ebSPierre Jolivet if (mumps->petsc_size == 1) { 267351ad14ebSPierre Jolivet mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 267451ad14ebSPierre Jolivet mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 267551ad14ebSPierre Jolivet } else { 267651ad14ebSPierre Jolivet mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 267751ad14ebSPierre Jolivet mumps->id.ICNTL(21) = 1; /* distributed solution */ 267851ad14ebSPierre Jolivet } 267993d70b8aSPierre Jolivet if (nblk && blkptr) { 268093d70b8aSPierre Jolivet mumps->id.ICNTL(15) = 1; 268193d70b8aSPierre Jolivet mumps->id.nblk = nblk; 268293d70b8aSPierre Jolivet mumps->id.blkvar = blkvar; 268393d70b8aSPierre Jolivet mumps->id.blkptr = blkptr; 2684cf053153SJunchao Zhang } else mumps->id.ICNTL(15) = 0; 268551ad14ebSPierre Jolivet 2686413bcc21SPierre Jolivet /* restore cached ICNTL and CNTL values */ 2687413bcc21SPierre Jolivet for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2688cf053153SJunchao Zhang for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]); 2689cf053153SJunchao Zhang 2690413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->ICNTL_pre)); 2691413bcc21SPierre Jolivet PetscCall(PetscFree(mumps->CNTL_pre)); 2692413bcc21SPierre Jolivet 2693413bcc21SPierre Jolivet if (schur) { 2694413bcc21SPierre Jolivet mumps->id.size_schur = size; 2695413bcc21SPierre Jolivet mumps->id.schur_lld = size; 2696413bcc21SPierre Jolivet mumps->id.schur = arr; 2697413bcc21SPierre Jolivet mumps->id.listvar_schur = listvar_schur; 2698413bcc21SPierre Jolivet if (mumps->petsc_size > 1) { 2699413bcc21SPierre Jolivet PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2700413bcc21SPierre Jolivet 2701413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2702413bcc21SPierre Jolivet gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 27035440e5dcSBarry Smith PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm)); 2704413bcc21SPierre Jolivet PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2705413bcc21SPierre Jolivet } else { 2706413bcc21SPierre Jolivet if (F->factortype == MAT_FACTOR_LU) { 2707413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2708413bcc21SPierre Jolivet } else { 2709413bcc21SPierre Jolivet mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2710413bcc21SPierre Jolivet } 2711413bcc21SPierre Jolivet } 2712413bcc21SPierre Jolivet mumps->id.ICNTL(26) = -1; 2713413bcc21SPierre Jolivet } 2714413bcc21SPierre Jolivet 2715413bcc21SPierre Jolivet /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2716413bcc21SPierre Jolivet For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2717413bcc21SPierre Jolivet */ 2718413bcc21SPierre Jolivet PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2719cf053153SJunchao Zhang PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); 2720413bcc21SPierre Jolivet 2721413bcc21SPierre Jolivet mumps->scat_rhs = NULL; 2722413bcc21SPierre Jolivet mumps->scat_sol = NULL; 2723413bcc21SPierre Jolivet } 27249566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 27259a2535b5SHong Zhang if (flg) mumps->id.ICNTL(1) = icntl; 27269566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 27279a2535b5SHong Zhang if (flg) mumps->id.ICNTL(2) = icntl; 27289566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 27299a2535b5SHong Zhang if (flg) mumps->id.ICNTL(3) = icntl; 2730dcd589f8SShri Abhyankar 27319566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 27329a2535b5SHong Zhang if (flg) mumps->id.ICNTL(4) = icntl; 27339a2535b5SHong Zhang if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 27349a2535b5SHong Zhang 27359566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 27369a2535b5SHong Zhang if (flg) mumps->id.ICNTL(6) = icntl; 27379a2535b5SHong Zhang 27389566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2739dcd589f8SShri Abhyankar if (flg) { 2740aed4548fSBarry Smith PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2741b53c1a7fSBarry Smith mumps->id.ICNTL(7) = icntl; 2742dcd589f8SShri Abhyankar } 2743e0b74bf9SHong Zhang 27449566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 27459566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 27469566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 27479566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 27489566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 27499566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 27509566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 275145e3843bSPierre Jolivet PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 27526497c311SBarry Smith if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs; 275345e3843bSPierre Jolivet PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 275445e3843bSPierre Jolivet if (flg) { 275593d70b8aSPierre Jolivet if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 275693d70b8aSPierre Jolivet else if (mumps->id.ICNTL(15) > 0) { 275793d70b8aSPierre Jolivet const PetscInt *bsizes; 275893d70b8aSPierre Jolivet PetscInt nblocks, p, *blkptr = NULL; 275993d70b8aSPierre Jolivet PetscMPIInt *recvcounts, *displs, n; 276093d70b8aSPierre Jolivet PetscMPIInt rank, size = 0; 276193d70b8aSPierre Jolivet 276293d70b8aSPierre Jolivet PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes)); 276393d70b8aSPierre Jolivet flg = PETSC_TRUE; 276493d70b8aSPierre Jolivet for (p = 0; p < nblocks; ++p) { 276593d70b8aSPierre Jolivet if (bsizes[p] > 1) break; 276693d70b8aSPierre Jolivet } 276793d70b8aSPierre Jolivet if (p == nblocks) flg = PETSC_FALSE; 27685440e5dcSBarry Smith PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 276993d70b8aSPierre Jolivet if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1 277093d70b8aSPierre Jolivet PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 277193d70b8aSPierre Jolivet if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 277293d70b8aSPierre Jolivet PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs)); 277393d70b8aSPierre Jolivet PetscCall(PetscMPIIntCast(nblocks, &n)); 277493d70b8aSPierre Jolivet PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A))); 277593d70b8aSPierre Jolivet for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p]; 277693d70b8aSPierre Jolivet PetscCall(PetscMalloc1(displs[size] + 1, &blkptr)); 277793d70b8aSPierre Jolivet PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 277893d70b8aSPierre Jolivet PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 277993d70b8aSPierre Jolivet if (rank == 0) { 278093d70b8aSPierre Jolivet blkptr[0] = 1; 278193d70b8aSPierre Jolivet for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p]; 278293d70b8aSPierre Jolivet PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr)); 278393d70b8aSPierre Jolivet } 278493d70b8aSPierre Jolivet PetscCall(PetscFree2(recvcounts, displs)); 278593d70b8aSPierre Jolivet PetscCall(PetscFree(blkptr)); 278693d70b8aSPierre Jolivet } 278793d70b8aSPierre Jolivet } 278845e3843bSPierre Jolivet } 27899566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 279059ac8732SStefano Zampini if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 27919566063dSJacob Faibussowitsch PetscCall(MatDestroy(&F->schur)); 27929566063dSJacob Faibussowitsch PetscCall(MatMumpsResetSchur_Private(mumps)); 279359ac8732SStefano Zampini } 279425aac85cSJunchao Zhang 279543f3b051SJunchao Zhang /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 279643f3b051SJunchao Zhang and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 279725aac85cSJunchao Zhang and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 279843f3b051SJunchao Zhang This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 279943f3b051SJunchao Zhang see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 280041caa250SJunchao Zhang In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 280125aac85cSJunchao Zhang */ 2802c183326eSPierre Jolivet mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */ 2803c183326eSPierre Jolivet #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI) 2804c183326eSPierre Jolivet mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */ 280525aac85cSJunchao Zhang #endif 28069566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2807aed4548fSBarry Smith PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 280825aac85cSJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2809aed4548fSBarry Smith PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 281025aac85cSJunchao Zhang #endif 28119566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 28129a2535b5SHong Zhang 28139566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 28149566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 28159566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2816ac530a7eSPierre Jolivet if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 2817d7ebd59bSHong Zhang 28189566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 28199566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 28209566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2821fa6fd9d0SPierre Jolivet PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 28229566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 28239566063dSJacob Faibussowitsch /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 28249566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2825145b44c9SPierre Jolivet /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 28269566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 28279566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 28289566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 282950ea2040Saszaboa PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL)); 28309566063dSJacob Faibussowitsch PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2831c92b4f89SPierre Jolivet PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL)); 283291b026caSPierre Jolivet PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL)); 2833146931dbSPierre Jolivet PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2834dcd589f8SShri Abhyankar 2835cf053153SJunchao Zhang PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg)); 2836cf053153SJunchao Zhang if (flg) ID_CNTL_SET(mumps->id, 1, cntl); 2837cf053153SJunchao Zhang PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg)); 2838cf053153SJunchao Zhang if (flg) ID_CNTL_SET(mumps->id, 2, cntl); 2839cf053153SJunchao Zhang PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg)); 2840cf053153SJunchao Zhang if (flg) ID_CNTL_SET(mumps->id, 3, cntl); 2841cf053153SJunchao Zhang PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg)); 2842cf053153SJunchao Zhang if (flg) ID_CNTL_SET(mumps->id, 4, cntl); 2843cf053153SJunchao Zhang PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg)); 2844cf053153SJunchao Zhang if (flg) ID_CNTL_SET(mumps->id, 5, cntl); 2845cf053153SJunchao Zhang PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg)); 2846cf053153SJunchao Zhang if (flg) ID_CNTL_SET(mumps->id, 7, cntl); 2847e5bb22a1SHong Zhang 28489566063dSJacob Faibussowitsch PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2849b34f08ffSHong Zhang 28509566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2851b34f08ffSHong Zhang if (ninfo) { 285208401ef6SPierre Jolivet PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 28539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2854b34f08ffSHong Zhang mumps->ninfo = ninfo; 2855b34f08ffSHong Zhang for (i = 0; i < ninfo; i++) { 2856aed4548fSBarry Smith PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2857f7d195e4SLawrence Mitchell mumps->info[i] = info[i]; 2858b34f08ffSHong Zhang } 2859b34f08ffSHong Zhang } 2860d0609cedSBarry Smith PetscOptionsEnd(); 28613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2862dcd589f8SShri Abhyankar } 2863dcd589f8SShri Abhyankar 2864d2a308c1SPierre Jolivet static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps) 2865d71ae5a4SJacob Faibussowitsch { 28665cd7cf9dSHong Zhang PetscFunctionBegin; 28675cd7cf9dSHong Zhang if (mumps->id.INFOG(1) < 0) { 28689261f6e4SBarry Smith PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 28695cd7cf9dSHong Zhang if (mumps->id.INFOG(1) == -6) { 28709261f6e4SBarry Smith PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2871603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 28725cd7cf9dSHong Zhang } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 28739261f6e4SBarry Smith PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2874603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 28755cd7cf9dSHong Zhang } else { 28769261f6e4SBarry Smith PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2877603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OTHER; 28785cd7cf9dSHong Zhang } 28795cd7cf9dSHong Zhang } 288072b150d8SStefano Zampini if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR; 28813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28825cd7cf9dSHong Zhang } 28835cd7cf9dSHong Zhang 2884d2a308c1SPierre Jolivet static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2885d71ae5a4SJacob Faibussowitsch { 2886e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 288767877ebaSShri Abhyankar Vec b; 288867877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 2889397b6df1SKris Buschelman 2890397b6df1SKris Buschelman PetscFunctionBegin; 2891d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2892d47f36abSHong Zhang /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 28933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2894d47f36abSHong Zhang } 2895dcd589f8SShri Abhyankar 28969a2535b5SHong Zhang /* Set MUMPS options from the options database */ 289726cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 2898dcd589f8SShri Abhyankar 28999566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 29009566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2901dcd589f8SShri Abhyankar 290267877ebaSShri Abhyankar /* analysis phase */ 2903a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 29046497c311SBarry Smith PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2905a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 290667877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 2907a5e57a09SHong Zhang if (!mumps->myid) { 2908a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 2909a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2910a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2911cf053153SJunchao Zhang if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 291251ad14ebSPierre Jolivet if (r && mumps->id.ICNTL(7) == 7) { 29134ac6704cSBarry Smith mumps->id.ICNTL(7) = 1; 2914a5e57a09SHong Zhang if (!mumps->myid) { 2915e0b74bf9SHong Zhang const PetscInt *idx; 2916a6053eceSJunchao Zhang PetscInt i; 29172205254eSKarl Rupp 29189566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 29199566063dSJacob Faibussowitsch PetscCall(ISGetIndices(r, &idx)); 2920f4f49eeaSPierre Jolivet for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */ 29219566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(r, &idx)); 2922e0b74bf9SHong Zhang } 2923e0b74bf9SHong Zhang } 292467877ebaSShri Abhyankar } 292567877ebaSShri Abhyankar break; 292667877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2927a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2928a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2929a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2930cf053153SJunchao Zhang if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 293125aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 29329566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 29339566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 29349566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 293525aac85cSJunchao Zhang } 293667877ebaSShri Abhyankar break; 293767877ebaSShri Abhyankar } 29383ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 29399566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 294067877ebaSShri Abhyankar 2941719d5645SBarry Smith F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2942dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 294351d5961aSHong Zhang F->ops->solvetranspose = MatSolveTranspose_MUMPS; 29444e34a73bSHong Zhang F->ops->matsolve = MatMatSolve_MUMPS; 2945eb3ef3b2SHong Zhang F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2946b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2947d47f36abSHong Zhang 2948d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 29493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2950b24902e0SBarry Smith } 2951b24902e0SBarry Smith 2952f0b74427SPierre Jolivet /* Note the PETSc r and c permutations are ignored */ 2953d2a308c1SPierre Jolivet static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2954d71ae5a4SJacob Faibussowitsch { 2955e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 295667877ebaSShri Abhyankar Vec b; 295767877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 2958450b117fSShri Abhyankar 2959450b117fSShri Abhyankar PetscFunctionBegin; 2960d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2961338d3105SPierre Jolivet /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 29623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2963d47f36abSHong Zhang } 2964dcd589f8SShri Abhyankar 29659a2535b5SHong Zhang /* Set MUMPS options from the options database */ 296626cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 2967dcd589f8SShri Abhyankar 29689566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 29699566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 297067877ebaSShri Abhyankar 297167877ebaSShri Abhyankar /* analysis phase */ 2972a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 29736497c311SBarry Smith PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2974a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 297567877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 2976a5e57a09SHong Zhang if (!mumps->myid) { 2977a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 2978a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 2979a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 2980cf053153SJunchao Zhang if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 298167877ebaSShri Abhyankar } 298267877ebaSShri Abhyankar break; 298367877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 2984a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 2985a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 2986a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 2987cf053153SJunchao Zhang if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 298825aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 29899566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 29909566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 29919566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 299225aac85cSJunchao Zhang } 299367877ebaSShri Abhyankar break; 299467877ebaSShri Abhyankar } 29953ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 29969566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 299767877ebaSShri Abhyankar 2998450b117fSShri Abhyankar F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2999dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 300051d5961aSHong Zhang F->ops->solvetranspose = MatSolveTranspose_MUMPS; 3001b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 3002d47f36abSHong Zhang 3003d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 30043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3005450b117fSShri Abhyankar } 3006b24902e0SBarry Smith 3007f0b74427SPierre Jolivet /* Note the PETSc r permutation and factor info are ignored */ 3008d2a308c1SPierre Jolivet static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info) 3009d71ae5a4SJacob Faibussowitsch { 3010e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 301167877ebaSShri Abhyankar Vec b; 301267877ebaSShri Abhyankar const PetscInt M = A->rmap->N; 3013397b6df1SKris Buschelman 3014397b6df1SKris Buschelman PetscFunctionBegin; 3015d47f36abSHong Zhang if (mumps->matstruc == SAME_NONZERO_PATTERN) { 3016338d3105SPierre Jolivet /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 30173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3018d47f36abSHong Zhang } 3019dcd589f8SShri Abhyankar 30209a2535b5SHong Zhang /* Set MUMPS options from the options database */ 302126cc229bSBarry Smith PetscCall(MatSetFromOptions_MUMPS(F, A)); 3022dcd589f8SShri Abhyankar 30239566063dSJacob Faibussowitsch PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 30249566063dSJacob Faibussowitsch PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 3025dcd589f8SShri Abhyankar 302667877ebaSShri Abhyankar /* analysis phase */ 3027a5e57a09SHong Zhang mumps->id.job = JOB_FACTSYMBOLIC; 30286497c311SBarry Smith PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 3029a5e57a09SHong Zhang switch (mumps->id.ICNTL(18)) { 303067877ebaSShri Abhyankar case 0: /* centralized assembled matrix input */ 3031a5e57a09SHong Zhang if (!mumps->myid) { 3032a6053eceSJunchao Zhang mumps->id.nnz = mumps->nnz; 3033a6053eceSJunchao Zhang mumps->id.irn = mumps->irn; 3034a6053eceSJunchao Zhang mumps->id.jcn = mumps->jcn; 3035cf053153SJunchao Zhang if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 303667877ebaSShri Abhyankar } 303767877ebaSShri Abhyankar break; 303867877ebaSShri Abhyankar case 3: /* distributed assembled matrix input (size>1) */ 3039a6053eceSJunchao Zhang mumps->id.nnz_loc = mumps->nnz; 3040a6053eceSJunchao Zhang mumps->id.irn_loc = mumps->irn; 3041a6053eceSJunchao Zhang mumps->id.jcn_loc = mumps->jcn; 3042cf053153SJunchao Zhang if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 304325aac85cSJunchao Zhang if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 30449566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &b)); 30459566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 30469566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b)); 304725aac85cSJunchao Zhang } 304867877ebaSShri Abhyankar break; 304967877ebaSShri Abhyankar } 30503ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 30519566063dSJacob Faibussowitsch PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 30525cd7cf9dSHong Zhang 30532792810eSHong Zhang F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 3054dcd589f8SShri Abhyankar F->ops->solve = MatSolve_MUMPS; 305551d5961aSHong Zhang F->ops->solvetranspose = MatSolve_MUMPS; 30564e34a73bSHong Zhang F->ops->matsolve = MatMatSolve_MUMPS; 305723a5080aSHong Zhang F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 3058b18964edSHong Zhang F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 30594e34a73bSHong Zhang #if defined(PETSC_USE_COMPLEX) 30600298fd71SBarry Smith F->ops->getinertia = NULL; 30614e34a73bSHong Zhang #else 30624e34a73bSHong Zhang F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 3063db4efbfdSBarry Smith #endif 3064d47f36abSHong Zhang 3065d47f36abSHong Zhang mumps->matstruc = SAME_NONZERO_PATTERN; 30663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3067b24902e0SBarry Smith } 3068b24902e0SBarry Smith 306966976f2fSJacob Faibussowitsch static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 3070d71ae5a4SJacob Faibussowitsch { 30719f196a02SMartin Diehl PetscBool isascii; 307264e6c443SBarry Smith PetscViewerFormat format; 3073e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 3074f6c57405SHong Zhang 3075f6c57405SHong Zhang PetscFunctionBegin; 307664e6c443SBarry Smith /* check if matrix is mumps type */ 30773ba16761SJacob Faibussowitsch if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 307864e6c443SBarry Smith 30799f196a02SMartin Diehl PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 30809f196a02SMartin Diehl if (isascii) { 30819566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 30821511cd71SPierre Jolivet if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 30839566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 30841511cd71SPierre Jolivet if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 30859566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 30869566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 30879566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 30889566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 30899566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 30909566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 30919566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 30929566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 30939566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 30949566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 30959566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 30969566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 3097a5e57a09SHong Zhang if (mumps->id.ICNTL(11) > 0) { 3098cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)ID_RINFOG_GET(mumps->id, 4))); 3099cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)ID_RINFOG_GET(mumps->id, 5))); 3100cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)ID_RINFOG_GET(mumps->id, 6))); 3101cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8))); 3102cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)ID_RINFOG_GET(mumps->id, 9))); 3103cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11))); 3104f6c57405SHong Zhang } 31059566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 31069566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 31079566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 310845e3843bSPierre Jolivet PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 3109f6c57405SHong Zhang /* ICNTL(15-17) not used */ 31109566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 31119566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 31129566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 31139566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 31149566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 31159566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 3116c0165424SHong Zhang 31179566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 31189566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 31199566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 31209566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 31219566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 31229566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 312342179a6aSHong Zhang 31249566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 31259566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 31269566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 31279566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 31289566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 312950ea2040Saszaboa PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(37) (compression of the contribution blocks): %d\n", mumps->id.ICNTL(37))); 31309566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 3131c92b4f89SPierre Jolivet PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(48) (multithreading with tree parallelism): %d\n", mumps->id.ICNTL(48))); 313291b026caSPierre Jolivet PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56))); 3133146931dbSPierre Jolivet PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 3134f6c57405SHong Zhang 3135cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 1))); 3136cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2))); 3137cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 3))); 3138cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)ID_CNTL_GET(mumps->id, 4))); 3139cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)ID_CNTL_GET(mumps->id, 5))); 3140cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)ID_CNTL_GET(mumps->id, 7))); 3141f6c57405SHong Zhang 3142a5b23f4aSJose E. Roman /* information local to each processor */ 31439566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 31449566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 3145cf053153SJunchao Zhang PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1))); 31469566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 31479566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 3148cf053153SJunchao Zhang PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2))); 31499566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 31509566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 3151cf053153SJunchao Zhang PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3))); 31529566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 3153f6c57405SHong Zhang 31549566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 31559566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 31569566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 3157f6c57405SHong Zhang 31589566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 31599566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 31609566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 3161f6c57405SHong Zhang 31629566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 31639566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 31649566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 3165b34f08ffSHong Zhang 3166a0e18203SThibaut Appel if (mumps->ninfo && mumps->ninfo <= 80) { 3167b34f08ffSHong Zhang PetscInt i; 3168b34f08ffSHong Zhang for (i = 0; i < mumps->ninfo; i++) { 31699566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 31709566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 31719566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 3172b34f08ffSHong Zhang } 3173b34f08ffSHong Zhang } 31749566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 31751511cd71SPierre Jolivet } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 3176f6c57405SHong Zhang 31771511cd71SPierre Jolivet if (mumps->myid == 0) { /* information from the host */ 3178cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1))); 3179cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2))); 3180cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3))); 3181cf053153SJunchao Zhang PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34))); 3182f6c57405SHong Zhang 31839566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 31849566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 31859566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 31869566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 31879566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 31889566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 31899566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 31909566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 31919566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 31929566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 31939566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 31949566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 31959566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 31969566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 31979566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 31989566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 31999566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 32009566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 32019566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 32029566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 32039566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 32049566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 32059566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 32069566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 32079566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 32089566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 32099566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 32109566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 32119566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 32129566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 32139566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 32149566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 32159566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 32169566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 3217f6c57405SHong Zhang } 3218f6c57405SHong Zhang } 3219cb828f0fSHong Zhang } 32203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3221f6c57405SHong Zhang } 3222f6c57405SHong Zhang 3223d2a308c1SPierre Jolivet static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info) 3224d71ae5a4SJacob Faibussowitsch { 3225e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 322635bd34faSBarry Smith 322735bd34faSBarry Smith PetscFunctionBegin; 322835bd34faSBarry Smith info->block_size = 1.0; 322964412097SPierre Jolivet info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 323064412097SPierre Jolivet info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 323135bd34faSBarry Smith info->nz_unneeded = 0.0; 323235bd34faSBarry Smith info->assemblies = 0.0; 323335bd34faSBarry Smith info->mallocs = 0.0; 323435bd34faSBarry Smith info->memory = 0.0; 323535bd34faSBarry Smith info->fill_ratio_given = 0; 323635bd34faSBarry Smith info->fill_ratio_needed = 0; 323735bd34faSBarry Smith info->factor_mallocs = 0; 32383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 323935bd34faSBarry Smith } 324035bd34faSBarry Smith 324166976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 3242d71ae5a4SJacob Faibussowitsch { 3243e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3244a3d589ffSStefano Zampini const PetscScalar *arr; 32458e7ba810SStefano Zampini const PetscInt *idxs; 32468e7ba810SStefano Zampini PetscInt size, i; 32476444a565SStefano Zampini 32486444a565SStefano Zampini PetscFunctionBegin; 32499566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is, &size)); 3250b3cb21ddSStefano Zampini /* Schur complement matrix */ 32519566063dSJacob Faibussowitsch PetscCall(MatDestroy(&F->schur)); 32529566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 32539566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 3254cf053153SJunchao Zhang // don't allocate mumps->id.schur[] now as its precision is yet to know 32556497c311SBarry Smith PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur)); 32566497c311SBarry Smith PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld)); 32579566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 325848a46eb9SPierre Jolivet if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 3259b3cb21ddSStefano Zampini 3260b3cb21ddSStefano Zampini /* MUMPS expects Fortran style indices */ 32619566063dSJacob Faibussowitsch PetscCall(PetscFree(mumps->id.listvar_schur)); 32629566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 32639566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is, &idxs)); 3264f4f49eeaSPierre Jolivet for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i])); 32659566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is, &idxs)); 326659ac8732SStefano Zampini /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 3267cf053153SJunchao Zhang if (mumps->id.icntl) mumps->id.ICNTL(26) = -1; 3268cf053153SJunchao Zhang else mumps->ICNTL26 = -1; 32693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 32706444a565SStefano Zampini } 327159ac8732SStefano Zampini 327266976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 3273d71ae5a4SJacob Faibussowitsch { 32746444a565SStefano Zampini Mat St; 3275e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 32766444a565SStefano Zampini PetscScalar *array; 3277cf053153SJunchao Zhang PetscInt i, j, N = mumps->id.size_schur; 32786444a565SStefano Zampini 32796444a565SStefano Zampini PetscFunctionBegin; 32809261f6e4SBarry Smith PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 32819566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 32829566063dSJacob Faibussowitsch PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 32839566063dSJacob Faibussowitsch PetscCall(MatSetType(St, MATDENSE)); 32849566063dSJacob Faibussowitsch PetscCall(MatSetUp(St)); 32859566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(St, &array)); 328659ac8732SStefano Zampini if (!mumps->sym) { /* MUMPS always return a full matrix */ 32876444a565SStefano Zampini if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 32886444a565SStefano Zampini for (i = 0; i < N; i++) { 3289cf053153SJunchao Zhang for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 32906444a565SStefano Zampini } 32916444a565SStefano Zampini } else { /* stored by columns */ 3292cf053153SJunchao Zhang PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array)); 32936444a565SStefano Zampini } 32946444a565SStefano Zampini } else { /* either full or lower-triangular (not packed) */ 32956444a565SStefano Zampini if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 32966444a565SStefano Zampini for (i = 0; i < N; i++) { 3297cf053153SJunchao Zhang for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 32986444a565SStefano Zampini } 32996444a565SStefano Zampini } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 3300cf053153SJunchao Zhang PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array)); 33016444a565SStefano Zampini } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 33026444a565SStefano Zampini for (i = 0; i < N; i++) { 3303cf053153SJunchao Zhang for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 33046444a565SStefano Zampini } 33056444a565SStefano Zampini } 33066444a565SStefano Zampini } 33079566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(St, &array)); 33086444a565SStefano Zampini *S = St; 33093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 33106444a565SStefano Zampini } 33116444a565SStefano Zampini 331266976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 3313d71ae5a4SJacob Faibussowitsch { 3314e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 33155ccb76cbSHong Zhang 33165ccb76cbSHong Zhang PetscFunctionBegin; 3317413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 33186497c311SBarry Smith PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 33199371c9d4SSatish Balay for (i = 0; i < nICNTL_pre; ++i) 33209371c9d4SSatish Balay if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 3321413bcc21SPierre Jolivet if (i == nICNTL_pre) { /* not already cached */ 3322413bcc21SPierre Jolivet if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 3323413bcc21SPierre Jolivet else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 3324413bcc21SPierre Jolivet mumps->ICNTL_pre[0]++; 3325413bcc21SPierre Jolivet } 33266497c311SBarry Smith mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl; 3327413bcc21SPierre Jolivet PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 3328413bcc21SPierre Jolivet } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 33293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 33305ccb76cbSHong Zhang } 33315ccb76cbSHong Zhang 333266976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 3333d71ae5a4SJacob Faibussowitsch { 3334e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3335bc6112feSHong Zhang 3336bc6112feSHong Zhang PetscFunctionBegin; 333736df9881Sjeremy theler if (mumps->id.job == JOB_NULL) { 333836df9881Sjeremy theler PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 333936df9881Sjeremy theler *ival = 0; 334036df9881Sjeremy theler for (i = 0; i < nICNTL_pre; ++i) { 334136df9881Sjeremy theler if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 334236df9881Sjeremy theler } 334336df9881Sjeremy theler } else *ival = mumps->id.ICNTL(icntl); 33443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3345bc6112feSHong Zhang } 3346bc6112feSHong Zhang 33475ccb76cbSHong Zhang /*@ 33481d27aa22SBarry Smith MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 33495ccb76cbSHong Zhang 3350c3339decSBarry Smith Logically Collective 33515ccb76cbSHong Zhang 33525ccb76cbSHong Zhang Input Parameters: 33530b4b7b1cSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 335479578405SBarry Smith . icntl - index of MUMPS parameter array `ICNTL()` 335579578405SBarry Smith - ival - value of MUMPS `ICNTL(icntl)` 33565ccb76cbSHong Zhang 33573c7db156SBarry Smith Options Database Key: 335879578405SBarry Smith . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival` 33595ccb76cbSHong Zhang 33605ccb76cbSHong Zhang Level: beginner 33615ccb76cbSHong Zhang 336279578405SBarry Smith Note: 336379578405SBarry Smith Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 336479578405SBarry Smith 33651cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 33665ccb76cbSHong Zhang @*/ 3367d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 3368d71ae5a4SJacob Faibussowitsch { 33695ccb76cbSHong Zhang PetscFunctionBegin; 33702989dfd4SHong Zhang PetscValidType(F, 1); 337128b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 33725ccb76cbSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 33735ccb76cbSHong Zhang PetscValidLogicalCollectiveInt(F, ival, 3); 337491b026caSPierre Jolivet PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 3375cac4c232SBarry Smith PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 33763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 33775ccb76cbSHong Zhang } 33785ccb76cbSHong Zhang 3379a21f80fcSHong Zhang /*@ 33801d27aa22SBarry Smith MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 3381a21f80fcSHong Zhang 3382c3339decSBarry Smith Logically Collective 3383a21f80fcSHong Zhang 3384a21f80fcSHong Zhang Input Parameters: 33850b4b7b1cSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3386a21f80fcSHong Zhang - icntl - index of MUMPS parameter array ICNTL() 3387a21f80fcSHong Zhang 3388a21f80fcSHong Zhang Output Parameter: 3389a21f80fcSHong Zhang . ival - value of MUMPS ICNTL(icntl) 3390a21f80fcSHong Zhang 3391a21f80fcSHong Zhang Level: beginner 3392a21f80fcSHong Zhang 33931cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3394a21f80fcSHong Zhang @*/ 3395d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 3396d71ae5a4SJacob Faibussowitsch { 3397bc6112feSHong Zhang PetscFunctionBegin; 33982989dfd4SHong Zhang PetscValidType(F, 1); 339928b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3400bc6112feSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 34014f572ea9SToby Isaac PetscAssertPointer(ival, 3); 3402c92b4f89SPierre Jolivet PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 3403cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 34043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3405bc6112feSHong Zhang } 3406bc6112feSHong Zhang 340766976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 3408d71ae5a4SJacob Faibussowitsch { 3409e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 34108928b65cSHong Zhang 34118928b65cSHong Zhang PetscFunctionBegin; 3412413bcc21SPierre Jolivet if (mumps->id.job == JOB_NULL) { 3413413bcc21SPierre Jolivet PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 34149371c9d4SSatish Balay for (i = 0; i < nCNTL_pre; ++i) 34159371c9d4SSatish Balay if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 3416413bcc21SPierre Jolivet if (i == nCNTL_pre) { 3417413bcc21SPierre Jolivet if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 3418413bcc21SPierre Jolivet else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 3419413bcc21SPierre Jolivet mumps->CNTL_pre[0]++; 3420413bcc21SPierre Jolivet } 3421413bcc21SPierre Jolivet mumps->CNTL_pre[1 + 2 * i] = icntl; 3422413bcc21SPierre Jolivet mumps->CNTL_pre[2 + 2 * i] = val; 3423cf053153SJunchao Zhang } else ID_CNTL_SET(mumps->id, icntl, val); 34243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 34258928b65cSHong Zhang } 34268928b65cSHong Zhang 342766976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 3428d71ae5a4SJacob Faibussowitsch { 3429e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3430bc6112feSHong Zhang 3431bc6112feSHong Zhang PetscFunctionBegin; 343236df9881Sjeremy theler if (mumps->id.job == JOB_NULL) { 343336df9881Sjeremy theler PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 343436df9881Sjeremy theler *val = 0.0; 343536df9881Sjeremy theler for (i = 0; i < nCNTL_pre; ++i) { 343636df9881Sjeremy theler if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 343736df9881Sjeremy theler } 3438cf053153SJunchao Zhang } else *val = ID_CNTL_GET(mumps->id, icntl); 34393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3440bc6112feSHong Zhang } 3441bc6112feSHong Zhang 34428928b65cSHong Zhang /*@ 34431d27aa22SBarry Smith MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 34448928b65cSHong Zhang 3445c3339decSBarry Smith Logically Collective 34468928b65cSHong Zhang 34478928b65cSHong Zhang Input Parameters: 34480b4b7b1cSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 344979578405SBarry Smith . icntl - index of MUMPS parameter array `CNTL()` 345079578405SBarry Smith - val - value of MUMPS `CNTL(icntl)` 34518928b65cSHong Zhang 34523c7db156SBarry Smith Options Database Key: 3453147403d9SBarry Smith . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 34548928b65cSHong Zhang 34558928b65cSHong Zhang Level: beginner 34568928b65cSHong Zhang 345779578405SBarry Smith Note: 345879578405SBarry Smith Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 345979578405SBarry Smith 34601cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 34618928b65cSHong Zhang @*/ 3462d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 3463d71ae5a4SJacob Faibussowitsch { 34648928b65cSHong Zhang PetscFunctionBegin; 34652989dfd4SHong Zhang PetscValidType(F, 1); 346628b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 34678928b65cSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 3468bc6112feSHong Zhang PetscValidLogicalCollectiveReal(F, val, 3); 3469413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3470cac4c232SBarry Smith PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 34713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 34728928b65cSHong Zhang } 34738928b65cSHong Zhang 3474a21f80fcSHong Zhang /*@ 34751d27aa22SBarry Smith MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3476a21f80fcSHong Zhang 3477c3339decSBarry Smith Logically Collective 3478a21f80fcSHong Zhang 3479a21f80fcSHong Zhang Input Parameters: 34800b4b7b1cSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3481a21f80fcSHong Zhang - icntl - index of MUMPS parameter array CNTL() 3482a21f80fcSHong Zhang 3483a21f80fcSHong Zhang Output Parameter: 3484a21f80fcSHong Zhang . val - value of MUMPS CNTL(icntl) 3485a21f80fcSHong Zhang 3486a21f80fcSHong Zhang Level: beginner 3487a21f80fcSHong Zhang 34881cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3489a21f80fcSHong Zhang @*/ 3490d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 3491d71ae5a4SJacob Faibussowitsch { 3492bc6112feSHong Zhang PetscFunctionBegin; 34932989dfd4SHong Zhang PetscValidType(F, 1); 349428b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3495bc6112feSHong Zhang PetscValidLogicalCollectiveInt(F, icntl, 2); 34964f572ea9SToby Isaac PetscAssertPointer(val, 3); 3497413bcc21SPierre Jolivet PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3498cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 34993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3500bc6112feSHong Zhang } 3501bc6112feSHong Zhang 350266976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 3503d71ae5a4SJacob Faibussowitsch { 3504e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3505bc6112feSHong Zhang 3506bc6112feSHong Zhang PetscFunctionBegin; 3507bc6112feSHong Zhang *info = mumps->id.INFO(icntl); 35083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3509bc6112feSHong Zhang } 3510bc6112feSHong Zhang 351166976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 3512d71ae5a4SJacob Faibussowitsch { 3513e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3514bc6112feSHong Zhang 3515bc6112feSHong Zhang PetscFunctionBegin; 3516bc6112feSHong Zhang *infog = mumps->id.INFOG(icntl); 35173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3518bc6112feSHong Zhang } 3519bc6112feSHong Zhang 352066976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 3521d71ae5a4SJacob Faibussowitsch { 3522e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3523bc6112feSHong Zhang 3524bc6112feSHong Zhang PetscFunctionBegin; 3525cf053153SJunchao Zhang *rinfo = ID_RINFO_GET(mumps->id, icntl); 35263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3527bc6112feSHong Zhang } 3528bc6112feSHong Zhang 352966976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 3530d71ae5a4SJacob Faibussowitsch { 3531e69c285eSBarry Smith Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3532bc6112feSHong Zhang 3533bc6112feSHong Zhang PetscFunctionBegin; 3534cf053153SJunchao Zhang *rinfog = ID_RINFOG_GET(mumps->id, icntl); 35353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3536bc6112feSHong Zhang } 3537bc6112feSHong Zhang 353866976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 35395c0bae8cSAshish Patel { 35405c0bae8cSAshish Patel Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 35415c0bae8cSAshish Patel 35425c0bae8cSAshish Patel PetscFunctionBegin; 35435c0bae8cSAshish Patel PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 35445c0bae8cSAshish Patel *size = 0; 35455c0bae8cSAshish Patel *array = NULL; 35465c0bae8cSAshish Patel if (!mumps->myid) { 35475c0bae8cSAshish Patel *size = mumps->id.INFOG(28); 35485c0bae8cSAshish Patel PetscCall(PetscMalloc1(*size, array)); 35495c0bae8cSAshish Patel for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 35505c0bae8cSAshish Patel } 35515c0bae8cSAshish Patel PetscFunctionReturn(PETSC_SUCCESS); 35525c0bae8cSAshish Patel } 35535c0bae8cSAshish Patel 355466976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 3555d71ae5a4SJacob Faibussowitsch { 35560e6b8875SHong Zhang Mat Bt = NULL, Btseq = NULL; 35570e6b8875SHong Zhang PetscBool flg; 3558bb599dfdSHong Zhang Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3559bb599dfdSHong Zhang PetscScalar *aa; 3560f410b75aSHong Zhang PetscInt spnr, *ia, *ja, M, nrhs; 3561bb599dfdSHong Zhang 3562bb599dfdSHong Zhang PetscFunctionBegin; 35634f572ea9SToby Isaac PetscAssertPointer(spRHS, 2); 3564013e2dc7SBarry Smith PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 356553587d93SPierre Jolivet PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 356653587d93SPierre Jolivet PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 35679566063dSJacob Faibussowitsch PetscCall(MatTransposeGetMat(spRHS, &Bt)); 3568bb599dfdSHong Zhang 35699566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3570bb599dfdSHong Zhang 35712d4298aeSJunchao Zhang if (mumps->petsc_size > 1) { 35720e6b8875SHong Zhang Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 35730e6b8875SHong Zhang Btseq = b->A; 35740e6b8875SHong Zhang } else { 35750e6b8875SHong Zhang Btseq = Bt; 35760e6b8875SHong Zhang } 35770e6b8875SHong Zhang 35789566063dSJacob Faibussowitsch PetscCall(MatGetSize(spRHS, &M, &nrhs)); 35796497c311SBarry Smith mumps->id.nrhs = (PetscMUMPSInt)nrhs; 35806497c311SBarry Smith PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs)); 3581f410b75aSHong Zhang mumps->id.rhs = NULL; 3582f410b75aSHong Zhang 3583e3f2db6aSHong Zhang if (!mumps->myid) { 35849566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 35859566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 358628b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 35879566063dSJacob Faibussowitsch PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3588cf053153SJunchao Zhang PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 3589e3f2db6aSHong Zhang } else { 3590e3f2db6aSHong Zhang mumps->id.irhs_ptr = NULL; 3591e3f2db6aSHong Zhang mumps->id.irhs_sparse = NULL; 3592e3f2db6aSHong Zhang mumps->id.nz_rhs = 0; 3593cf053153SJunchao Zhang if (mumps->id.rhs_sparse_len) { 3594cf053153SJunchao Zhang PetscCall(PetscFree(mumps->id.rhs_sparse)); 3595cf053153SJunchao Zhang mumps->id.rhs_sparse_len = 0; 3596cf053153SJunchao Zhang } 3597e3f2db6aSHong Zhang } 3598bb599dfdSHong Zhang mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3599e3f2db6aSHong Zhang mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3600bb599dfdSHong Zhang 3601bb599dfdSHong Zhang /* solve phase */ 3602bb599dfdSHong Zhang mumps->id.job = JOB_SOLVE; 36033ab56b82SJunchao Zhang PetscMUMPS_c(mumps); 36049261f6e4SBarry Smith PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 360514267174SHong Zhang 3606e3f2db6aSHong Zhang if (!mumps->myid) { 36079566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 36089566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 360928b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3610e3f2db6aSHong Zhang } 36113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3612bb599dfdSHong Zhang } 3613bb599dfdSHong Zhang 3614bb599dfdSHong Zhang /*@ 36151d27aa22SBarry Smith MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc> 3616bb599dfdSHong Zhang 3617c3339decSBarry Smith Logically Collective 3618bb599dfdSHong Zhang 361920f4b53cSBarry Smith Input Parameter: 36200b4b7b1cSBarry Smith . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3621bb599dfdSHong Zhang 3622bb599dfdSHong Zhang Output Parameter: 362320f4b53cSBarry Smith . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3624bb599dfdSHong Zhang 3625bb599dfdSHong Zhang Level: beginner 3626bb599dfdSHong Zhang 36271cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3628bb599dfdSHong Zhang @*/ 3629d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3630d71ae5a4SJacob Faibussowitsch { 3631bb599dfdSHong Zhang PetscFunctionBegin; 3632bb599dfdSHong Zhang PetscValidType(F, 1); 363328b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3634cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 36353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3636bb599dfdSHong Zhang } 3637bb599dfdSHong Zhang 363866976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3639d71ae5a4SJacob Faibussowitsch { 36400e6b8875SHong Zhang Mat spRHS; 36410e6b8875SHong Zhang 36420e6b8875SHong Zhang PetscFunctionBegin; 36439566063dSJacob Faibussowitsch PetscCall(MatCreateTranspose(spRHST, &spRHS)); 36449566063dSJacob Faibussowitsch PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 36459566063dSJacob Faibussowitsch PetscCall(MatDestroy(&spRHS)); 36463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 36470e6b8875SHong Zhang } 36480e6b8875SHong Zhang 36490e6b8875SHong Zhang /*@ 36501d27aa22SBarry Smith MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc> 36510e6b8875SHong Zhang 3652c3339decSBarry Smith Logically Collective 36530e6b8875SHong Zhang 365420f4b53cSBarry Smith Input Parameter: 36550b4b7b1cSBarry Smith . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 36560e6b8875SHong Zhang 36570e6b8875SHong Zhang Output Parameter: 365820f4b53cSBarry Smith . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 36590e6b8875SHong Zhang 36600e6b8875SHong Zhang Level: beginner 36610e6b8875SHong Zhang 36621cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 36630e6b8875SHong Zhang @*/ 3664d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3665d71ae5a4SJacob Faibussowitsch { 36660e6b8875SHong Zhang PetscBool flg; 36670e6b8875SHong Zhang 36680e6b8875SHong Zhang PetscFunctionBegin; 36690e6b8875SHong Zhang PetscValidType(F, 1); 367028b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 36719566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 367228b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3673cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 36743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 36750e6b8875SHong Zhang } 36760e6b8875SHong Zhang 367793d70b8aSPierre Jolivet static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 367893d70b8aSPierre Jolivet { 367993d70b8aSPierre Jolivet Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 368093d70b8aSPierre Jolivet 368193d70b8aSPierre Jolivet PetscFunctionBegin; 368293d70b8aSPierre Jolivet if (nblk) { 368393d70b8aSPierre Jolivet PetscAssertPointer(blkptr, 4); 368493d70b8aSPierre Jolivet PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk)); 368593d70b8aSPierre Jolivet PetscCall(PetscFree(mumps->id.blkptr)); 368693d70b8aSPierre Jolivet PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr)); 368793d70b8aSPierre Jolivet for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i)); 3688cf053153SJunchao Zhang // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15). 3689cf053153SJunchao Zhang // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS(). 3690cf053153SJunchao Zhang // mumps->id.ICNTL(15) = 1; 369193d70b8aSPierre Jolivet if (blkvar) { 369293d70b8aSPierre Jolivet PetscCall(PetscFree(mumps->id.blkvar)); 369393d70b8aSPierre Jolivet PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar)); 369493d70b8aSPierre Jolivet for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i)); 369593d70b8aSPierre Jolivet } 369693d70b8aSPierre Jolivet } else { 369793d70b8aSPierre Jolivet PetscCall(PetscFree(mumps->id.blkptr)); 369893d70b8aSPierre Jolivet PetscCall(PetscFree(mumps->id.blkvar)); 3699cf053153SJunchao Zhang // mumps->id.ICNTL(15) = 0; 3700cf053153SJunchao Zhang mumps->id.nblk = 0; 370193d70b8aSPierre Jolivet } 370293d70b8aSPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS); 370393d70b8aSPierre Jolivet } 370493d70b8aSPierre Jolivet 370593d70b8aSPierre Jolivet /*@ 370693d70b8aSPierre Jolivet MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1` 370793d70b8aSPierre Jolivet 370893d70b8aSPierre Jolivet Not collective, only relevant on the first process of the MPI communicator 370993d70b8aSPierre Jolivet 371093d70b8aSPierre Jolivet Input Parameters: 371193d70b8aSPierre Jolivet + F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 371293d70b8aSPierre Jolivet . nblk - the number of blocks 371393d70b8aSPierre Jolivet . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk` 371493d70b8aSPierre Jolivet - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks 371593d70b8aSPierre Jolivet 371693d70b8aSPierre Jolivet Level: advanced 371793d70b8aSPierre Jolivet 371893d70b8aSPierre Jolivet .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()` 371993d70b8aSPierre Jolivet @*/ 372093d70b8aSPierre Jolivet PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 372193d70b8aSPierre Jolivet { 372293d70b8aSPierre Jolivet PetscFunctionBegin; 372393d70b8aSPierre Jolivet PetscValidType(F, 1); 372493d70b8aSPierre Jolivet PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 372593d70b8aSPierre Jolivet PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr)); 372693d70b8aSPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS); 372793d70b8aSPierre Jolivet } 372893d70b8aSPierre Jolivet 3729a21f80fcSHong Zhang /*@ 37301d27aa22SBarry Smith MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc> 3731a21f80fcSHong Zhang 3732c3339decSBarry Smith Logically Collective 3733a21f80fcSHong Zhang 3734a21f80fcSHong Zhang Input Parameters: 37350b4b7b1cSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3736a21f80fcSHong Zhang - icntl - index of MUMPS parameter array INFO() 3737a21f80fcSHong Zhang 3738a21f80fcSHong Zhang Output Parameter: 3739a21f80fcSHong Zhang . ival - value of MUMPS INFO(icntl) 3740a21f80fcSHong Zhang 3741a21f80fcSHong Zhang Level: beginner 3742a21f80fcSHong Zhang 37431cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3744a21f80fcSHong Zhang @*/ 3745d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3746d71ae5a4SJacob Faibussowitsch { 3747bc6112feSHong Zhang PetscFunctionBegin; 37482989dfd4SHong Zhang PetscValidType(F, 1); 374928b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 37504f572ea9SToby Isaac PetscAssertPointer(ival, 3); 3751cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 37523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3753bc6112feSHong Zhang } 3754bc6112feSHong Zhang 3755a21f80fcSHong Zhang /*@ 37561d27aa22SBarry Smith MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc> 3757a21f80fcSHong Zhang 3758c3339decSBarry Smith Logically Collective 3759a21f80fcSHong Zhang 3760a21f80fcSHong Zhang Input Parameters: 37610b4b7b1cSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3762a21f80fcSHong Zhang - icntl - index of MUMPS parameter array INFOG() 3763a21f80fcSHong Zhang 3764a21f80fcSHong Zhang Output Parameter: 3765a21f80fcSHong Zhang . ival - value of MUMPS INFOG(icntl) 3766a21f80fcSHong Zhang 3767a21f80fcSHong Zhang Level: beginner 3768a21f80fcSHong Zhang 37691cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3770a21f80fcSHong Zhang @*/ 3771d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3772d71ae5a4SJacob Faibussowitsch { 3773bc6112feSHong Zhang PetscFunctionBegin; 37742989dfd4SHong Zhang PetscValidType(F, 1); 377528b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 37764f572ea9SToby Isaac PetscAssertPointer(ival, 3); 3777cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 37783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3779bc6112feSHong Zhang } 3780bc6112feSHong Zhang 3781a21f80fcSHong Zhang /*@ 37821d27aa22SBarry Smith MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc> 3783a21f80fcSHong Zhang 3784c3339decSBarry Smith Logically Collective 3785a21f80fcSHong Zhang 3786a21f80fcSHong Zhang Input Parameters: 37870b4b7b1cSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3788a21f80fcSHong Zhang - icntl - index of MUMPS parameter array RINFO() 3789a21f80fcSHong Zhang 3790a21f80fcSHong Zhang Output Parameter: 3791a21f80fcSHong Zhang . val - value of MUMPS RINFO(icntl) 3792a21f80fcSHong Zhang 3793a21f80fcSHong Zhang Level: beginner 3794a21f80fcSHong Zhang 37951cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3796a21f80fcSHong Zhang @*/ 3797d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3798d71ae5a4SJacob Faibussowitsch { 3799bc6112feSHong Zhang PetscFunctionBegin; 38002989dfd4SHong Zhang PetscValidType(F, 1); 380128b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 38024f572ea9SToby Isaac PetscAssertPointer(val, 3); 3803cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 38043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3805bc6112feSHong Zhang } 3806bc6112feSHong Zhang 3807a21f80fcSHong Zhang /*@ 38081d27aa22SBarry Smith MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc> 3809a21f80fcSHong Zhang 3810c3339decSBarry Smith Logically Collective 3811a21f80fcSHong Zhang 3812a21f80fcSHong Zhang Input Parameters: 38130b4b7b1cSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3814a21f80fcSHong Zhang - icntl - index of MUMPS parameter array RINFOG() 3815a21f80fcSHong Zhang 3816a21f80fcSHong Zhang Output Parameter: 3817a21f80fcSHong Zhang . val - value of MUMPS RINFOG(icntl) 3818a21f80fcSHong Zhang 3819a21f80fcSHong Zhang Level: beginner 3820a21f80fcSHong Zhang 38211cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3822a21f80fcSHong Zhang @*/ 3823d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3824d71ae5a4SJacob Faibussowitsch { 3825bc6112feSHong Zhang PetscFunctionBegin; 38262989dfd4SHong Zhang PetscValidType(F, 1); 382728b400f6SJacob Faibussowitsch PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 38284f572ea9SToby Isaac PetscAssertPointer(val, 3); 3829cac4c232SBarry Smith PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 38303ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3831bc6112feSHong Zhang } 3832bc6112feSHong Zhang 38335c0bae8cSAshish Patel /*@ 38341d27aa22SBarry Smith MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc> 38355c0bae8cSAshish Patel 38365c0bae8cSAshish Patel Logically Collective 38375c0bae8cSAshish Patel 38385c0bae8cSAshish Patel Input Parameter: 38390b4b7b1cSBarry Smith . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 38405c0bae8cSAshish Patel 38415c0bae8cSAshish Patel Output Parameters: 38420b4b7b1cSBarry Smith + size - local size of the array. The size of the array is non-zero only on MPI rank 0 38435c0bae8cSAshish Patel - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 38445c0bae8cSAshish Patel for freeing this array. 38455c0bae8cSAshish Patel 38465c0bae8cSAshish Patel Level: beginner 38475c0bae8cSAshish Patel 38481cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 38495c0bae8cSAshish Patel @*/ 38505c0bae8cSAshish Patel PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 38515c0bae8cSAshish Patel { 38525c0bae8cSAshish Patel PetscFunctionBegin; 38535c0bae8cSAshish Patel PetscValidType(F, 1); 38545c0bae8cSAshish Patel PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 38554f572ea9SToby Isaac PetscAssertPointer(size, 2); 38564f572ea9SToby Isaac PetscAssertPointer(array, 3); 38575c0bae8cSAshish Patel PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 38585c0bae8cSAshish Patel PetscFunctionReturn(PETSC_SUCCESS); 38595c0bae8cSAshish Patel } 38605c0bae8cSAshish Patel 386124b6179bSKris Buschelman /*MC 38622692d6eeSBarry Smith MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 38630b4b7b1cSBarry Smith MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc> 386424b6179bSKris Buschelman 386511a5261eSBarry Smith Works with `MATAIJ` and `MATSBAIJ` matrices 386624b6179bSKris Buschelman 3867c2b89b5dSBarry Smith Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3868c2b89b5dSBarry Smith 38692ef1f0ffSBarry Smith Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 38702ef1f0ffSBarry Smith See details below. 3871217d3b1eSJunchao Zhang 38722ef1f0ffSBarry Smith Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3873c2b89b5dSBarry Smith 387424b6179bSKris Buschelman Options Database Keys: 38754422a9fcSPatrick Sanan + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 38764422a9fcSPatrick Sanan . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 38774422a9fcSPatrick Sanan . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 38784422a9fcSPatrick Sanan . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 38794422a9fcSPatrick Sanan . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3880b53c1a7fSBarry Smith . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3881b53c1a7fSBarry Smith Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 38824422a9fcSPatrick Sanan . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 38834422a9fcSPatrick Sanan . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 38844422a9fcSPatrick Sanan . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 38854422a9fcSPatrick Sanan . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 38864422a9fcSPatrick Sanan . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 38874422a9fcSPatrick Sanan . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 388845e3843bSPierre Jolivet . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 38894422a9fcSPatrick Sanan . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 389025aac85cSJunchao Zhang . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 38914422a9fcSPatrick Sanan . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 38924422a9fcSPatrick Sanan . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 38934422a9fcSPatrick Sanan . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 38944422a9fcSPatrick Sanan . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 38954422a9fcSPatrick Sanan . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3896fa6fd9d0SPierre Jolivet . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering 38974422a9fcSPatrick Sanan . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 38984422a9fcSPatrick Sanan . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 38994422a9fcSPatrick Sanan . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 39004422a9fcSPatrick Sanan . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3901a0e18203SThibaut Appel . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3902a0e18203SThibaut Appel . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 390350ea2040Saszaboa . -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB) 3904a0e18203SThibaut Appel . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3905c92b4f89SPierre Jolivet . -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism 3906146931dbSPierre Jolivet . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 39074422a9fcSPatrick Sanan . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 39084422a9fcSPatrick Sanan . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 39094422a9fcSPatrick Sanan . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 39104422a9fcSPatrick Sanan . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3911217d3b1eSJunchao Zhang . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3912a0e18203SThibaut Appel . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3913217d3b1eSJunchao Zhang - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3914217d3b1eSJunchao Zhang Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 391524b6179bSKris Buschelman 391624b6179bSKris Buschelman Level: beginner 391724b6179bSKris Buschelman 391895452b02SPatrick Sanan Notes: 39191d27aa22SBarry Smith MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will 39202ef1f0ffSBarry Smith error if the matrix is Hermitian. 392138548759SBarry Smith 392226cc229bSBarry Smith When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 392326cc229bSBarry Smith `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 392426cc229bSBarry Smith 39252ef1f0ffSBarry Smith When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 39262ef1f0ffSBarry Smith the failure with 39272ef1f0ffSBarry Smith .vb 39282ef1f0ffSBarry Smith KSPGetPC(ksp,&pc); 39292ef1f0ffSBarry Smith PCFactorGetMatrix(pc,&mat); 39302ef1f0ffSBarry Smith MatMumpsGetInfo(mat,....); 39312ef1f0ffSBarry Smith MatMumpsGetInfog(mat,....); etc. 39322ef1f0ffSBarry Smith .ve 39332ef1f0ffSBarry Smith Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 39349fc87aa7SBarry Smith 3935a5399872SJunchao Zhang MUMPS provides 64-bit integer support in two build modes: 3936a5399872SJunchao Zhang full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3937a5399872SJunchao Zhang requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 39388fcaa860SBarry Smith 3939a5399872SJunchao Zhang selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3940a5399872SJunchao Zhang MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3941a5399872SJunchao Zhang columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3942a5399872SJunchao Zhang integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3943a5399872SJunchao Zhang 3944a5399872SJunchao Zhang With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3945a5399872SJunchao Zhang 3946a5399872SJunchao Zhang Two modes to run MUMPS/PETSc with OpenMP 39472ef1f0ffSBarry Smith .vb 39480b4b7b1cSBarry Smith Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 39490b4b7b1cSBarry Smith threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test". 39502ef1f0ffSBarry Smith .ve 39518fcaa860SBarry Smith 39522ef1f0ffSBarry Smith .vb 39530b4b7b1cSBarry Smith `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example, 39542ef1f0ffSBarry Smith if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 39552ef1f0ffSBarry Smith .ve 39568fcaa860SBarry Smith 39578fcaa860SBarry Smith To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 39582ef1f0ffSBarry Smith (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 39592ef1f0ffSBarry Smith (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 39608fcaa860SBarry Smith libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 39610b4b7b1cSBarry Smith (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided). 3962217d3b1eSJunchao Zhang 39638fcaa860SBarry Smith If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3964217d3b1eSJunchao Zhang processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3965217d3b1eSJunchao Zhang size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3966217d3b1eSJunchao Zhang are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3967217d3b1eSJunchao Zhang by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3968217d3b1eSJunchao Zhang In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3969217d3b1eSJunchao Zhang if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 39700b4b7b1cSBarry Smith MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3971217d3b1eSJunchao Zhang cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 39720b4b7b1cSBarry Smith problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding. 39730b4b7b1cSBarry Smith For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and 3974217d3b1eSJunchao Zhang examine the mapping result. 3975217d3b1eSJunchao Zhang 397611a5261eSBarry Smith PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 397711a5261eSBarry Smith for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 397811a5261eSBarry Smith calls `omp_set_num_threads`(m) internally before calling MUMPS. 3979217d3b1eSJunchao Zhang 39801d27aa22SBarry Smith See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating` 3981217d3b1eSJunchao Zhang 398293d70b8aSPierre Jolivet .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()` 398324b6179bSKris Buschelman M*/ 398424b6179bSKris Buschelman 3985d2a308c1SPierre Jolivet static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type) 3986d71ae5a4SJacob Faibussowitsch { 398735bd34faSBarry Smith PetscFunctionBegin; 39882692d6eeSBarry Smith *type = MATSOLVERMUMPS; 39893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 399035bd34faSBarry Smith } 399135bd34faSBarry Smith 3992bccb9932SShri Abhyankar /* MatGetFactor for Seq and MPI AIJ matrices */ 3993d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3994d71ae5a4SJacob Faibussowitsch { 39952877fffaSHong Zhang Mat B; 39962877fffaSHong Zhang Mat_MUMPS *mumps; 39974b9405b2SPierre Jolivet PetscBool isSeqAIJ, isDiag, isDense; 39982c7c0729SBarry Smith PetscMPIInt size; 39992877fffaSHong Zhang 40002877fffaSHong Zhang PetscFunctionBegin; 4001eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 400203e5aca4SStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 400303e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 400403e5aca4SStefano Zampini *F = NULL; 400503e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 400603e5aca4SStefano Zampini } 4007eb1ec7c1SStefano Zampini #endif 40082877fffaSHong Zhang /* Create the factorization matrix */ 40099566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 4010c3e1b152SPierre Jolivet PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 40114b9405b2SPierre Jolivet PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 40129566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 40139566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4014d2a308c1SPierre Jolivet PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 40159566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 40162877fffaSHong Zhang 40174dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 40182205254eSKarl Rupp 40192877fffaSHong Zhang B->ops->view = MatView_MUMPS; 402035bd34faSBarry Smith B->ops->getinfo = MatGetInfo_MUMPS; 40212205254eSKarl Rupp 40229566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 40239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 40249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 40259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 40269566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 40279566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 40289566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 40299566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 40309566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 40319566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 40329566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 40335c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 40349566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 40359566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 403693d70b8aSPierre Jolivet PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 40376444a565SStefano Zampini 4038450b117fSShri Abhyankar if (ftype == MAT_FACTOR_LU) { 4039450b117fSShri Abhyankar B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4040d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 4041bccb9932SShri Abhyankar if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 4042c3e1b152SPierre Jolivet else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 40434b9405b2SPierre Jolivet else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 4044bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 40459566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4046746480a1SHong Zhang mumps->sym = 0; 4047dcd589f8SShri Abhyankar } else { 404867877ebaSShri Abhyankar B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4049450b117fSShri Abhyankar B->factortype = MAT_FACTOR_CHOLESKY; 4050bccb9932SShri Abhyankar if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 4051c3e1b152SPierre Jolivet else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 40524b9405b2SPierre Jolivet else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 4053bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 40549566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 405559ac8732SStefano Zampini #if defined(PETSC_USE_COMPLEX) 405659ac8732SStefano Zampini mumps->sym = 2; 405759ac8732SStefano Zampini #else 4058b94d7dedSBarry Smith if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 40596fdc2a6dSBarry Smith else mumps->sym = 2; 406059ac8732SStefano Zampini #endif 4061450b117fSShri Abhyankar } 40622877fffaSHong Zhang 406300c67f3bSHong Zhang /* set solvertype */ 40649566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 40659566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 40669566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 40672c7c0729SBarry Smith if (size == 1) { 40684ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4069f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 40702c7c0729SBarry Smith } 40712877fffaSHong Zhang B->ops->destroy = MatDestroy_MUMPS; 4072e69c285eSBarry Smith B->data = (void *)mumps; 40732205254eSKarl Rupp 40742877fffaSHong Zhang *F = B; 4075413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 4076413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 4077413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 4078d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 40793ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 40802877fffaSHong Zhang } 40812877fffaSHong Zhang 4082bccb9932SShri Abhyankar /* MatGetFactor for Seq and MPI SBAIJ matrices */ 4083d2a308c1SPierre Jolivet static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F) 4084d71ae5a4SJacob Faibussowitsch { 40852877fffaSHong Zhang Mat B; 40862877fffaSHong Zhang Mat_MUMPS *mumps; 4087ace3abfcSBarry Smith PetscBool isSeqSBAIJ; 40882c7c0729SBarry Smith PetscMPIInt size; 40892877fffaSHong Zhang 40902877fffaSHong Zhang PetscFunctionBegin; 4091eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 409203e5aca4SStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 409303e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 409403e5aca4SStefano Zampini *F = NULL; 409503e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 409603e5aca4SStefano Zampini } 4097eb1ec7c1SStefano Zampini #endif 40989566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 40999566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4100d2a308c1SPierre Jolivet PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 41019566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 4102e69c285eSBarry Smith 41034dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 41049566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 4105bccb9932SShri Abhyankar if (isSeqSBAIJ) { 410616ebf90aSShri Abhyankar mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 4107dcd589f8SShri Abhyankar } else { 4108bccb9932SShri Abhyankar mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 4109bccb9932SShri Abhyankar } 4110bccb9932SShri Abhyankar 411167877ebaSShri Abhyankar B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4112bccb9932SShri Abhyankar B->ops->view = MatView_MUMPS; 4113722b6324SPierre Jolivet B->ops->getinfo = MatGetInfo_MUMPS; 41142205254eSKarl Rupp 41159566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 41169566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 41179566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 41189566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 41199566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 41209566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 41219566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 41229566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 41239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 41249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 41259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 41265c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 41279566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 41289566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 412993d70b8aSPierre Jolivet PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 41302205254eSKarl Rupp 4131f4762488SHong Zhang B->factortype = MAT_FACTOR_CHOLESKY; 413259ac8732SStefano Zampini #if defined(PETSC_USE_COMPLEX) 413359ac8732SStefano Zampini mumps->sym = 2; 413459ac8732SStefano Zampini #else 4135b94d7dedSBarry Smith if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 41366fdc2a6dSBarry Smith else mumps->sym = 2; 413759ac8732SStefano Zampini #endif 4138a214ac2aSShri Abhyankar 413900c67f3bSHong Zhang /* set solvertype */ 41409566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 41419566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 41429566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 41432c7c0729SBarry Smith if (size == 1) { 41444ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4145f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 41462c7c0729SBarry Smith } 41479566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 4148f3c0ef26SHong Zhang B->ops->destroy = MatDestroy_MUMPS; 4149e69c285eSBarry Smith B->data = (void *)mumps; 41502205254eSKarl Rupp 41512877fffaSHong Zhang *F = B; 4152413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 4153413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 4154413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 4155d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 41563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 41572877fffaSHong Zhang } 415897969023SHong Zhang 4159d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 4160d71ae5a4SJacob Faibussowitsch { 416167877ebaSShri Abhyankar Mat B; 416267877ebaSShri Abhyankar Mat_MUMPS *mumps; 4163ace3abfcSBarry Smith PetscBool isSeqBAIJ; 41642c7c0729SBarry Smith PetscMPIInt size; 416567877ebaSShri Abhyankar 416667877ebaSShri Abhyankar PetscFunctionBegin; 416767877ebaSShri Abhyankar /* Create the factorization matrix */ 41689566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 41699566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 41709566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4171d2a308c1SPierre Jolivet PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 41729566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 4173450b117fSShri Abhyankar 41744dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 4175966bd95aSPierre Jolivet PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 4176450b117fSShri Abhyankar B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 4177450b117fSShri Abhyankar B->factortype = MAT_FACTOR_LU; 4178bccb9932SShri Abhyankar if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 4179bccb9932SShri Abhyankar else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 4180746480a1SHong Zhang mumps->sym = 0; 41819566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4182bccb9932SShri Abhyankar 4183450b117fSShri Abhyankar B->ops->view = MatView_MUMPS; 4184722b6324SPierre Jolivet B->ops->getinfo = MatGetInfo_MUMPS; 41852205254eSKarl Rupp 41869566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 41879566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 41889566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 41899566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 41909566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 41919566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 41929566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 41939566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 41949566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 41959566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 41969566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 41975c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 41989566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 41999566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 420093d70b8aSPierre Jolivet PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4201450b117fSShri Abhyankar 420200c67f3bSHong Zhang /* set solvertype */ 42039566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 42049566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 42059566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 42062c7c0729SBarry Smith if (size == 1) { 42074ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4208f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 42092c7c0729SBarry Smith } 42107ee00b23SStefano Zampini B->ops->destroy = MatDestroy_MUMPS; 42117ee00b23SStefano Zampini B->data = (void *)mumps; 42127ee00b23SStefano Zampini 42137ee00b23SStefano Zampini *F = B; 4214413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 4215413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 4216413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 4217d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 42183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 42197ee00b23SStefano Zampini } 42207ee00b23SStefano Zampini 42217ee00b23SStefano Zampini /* MatGetFactor for Seq and MPI SELL matrices */ 4222d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 4223d71ae5a4SJacob Faibussowitsch { 42247ee00b23SStefano Zampini Mat B; 42257ee00b23SStefano Zampini Mat_MUMPS *mumps; 42267ee00b23SStefano Zampini PetscBool isSeqSELL; 42272c7c0729SBarry Smith PetscMPIInt size; 42287ee00b23SStefano Zampini 42297ee00b23SStefano Zampini PetscFunctionBegin; 42307ee00b23SStefano Zampini /* Create the factorization matrix */ 42319566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 42329566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 42339566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4234d2a308c1SPierre Jolivet PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 42359566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 42367ee00b23SStefano Zampini 42374dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mumps)); 42387ee00b23SStefano Zampini 42397ee00b23SStefano Zampini B->ops->view = MatView_MUMPS; 42407ee00b23SStefano Zampini B->ops->getinfo = MatGetInfo_MUMPS; 42417ee00b23SStefano Zampini 42429566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 42439566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 42449566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 42459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 42469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 42479566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 42489566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 42499566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 42509566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 42519566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 42529566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 42535c0bae8cSAshish Patel PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 42547ee00b23SStefano Zampini 4255966bd95aSPierre Jolivet PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 42567ee00b23SStefano Zampini B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 42577ee00b23SStefano Zampini B->factortype = MAT_FACTOR_LU; 4258966bd95aSPierre Jolivet PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 4259966bd95aSPierre Jolivet mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 42607ee00b23SStefano Zampini mumps->sym = 0; 42619566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 42627ee00b23SStefano Zampini 42637ee00b23SStefano Zampini /* set solvertype */ 42649566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 42659566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 42669566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 42672c7c0729SBarry Smith if (size == 1) { 42684ac6704cSBarry Smith /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4269f73b0415SBarry Smith B->canuseordering = PETSC_TRUE; 42702c7c0729SBarry Smith } 4271450b117fSShri Abhyankar B->ops->destroy = MatDestroy_MUMPS; 4272e69c285eSBarry Smith B->data = (void *)mumps; 42732205254eSKarl Rupp 4274450b117fSShri Abhyankar *F = B; 4275413bcc21SPierre Jolivet mumps->id.job = JOB_NULL; 4276413bcc21SPierre Jolivet mumps->ICNTL_pre = NULL; 4277413bcc21SPierre Jolivet mumps->CNTL_pre = NULL; 4278d47f36abSHong Zhang mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 42793ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4280450b117fSShri Abhyankar } 428142c9c57cSBarry Smith 42829d0448ceSStefano Zampini /* MatGetFactor for MATNEST matrices */ 42839d0448ceSStefano Zampini static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 42849d0448ceSStefano Zampini { 42859d0448ceSStefano Zampini Mat B, **mats; 42869d0448ceSStefano Zampini Mat_MUMPS *mumps; 42879d0448ceSStefano Zampini PetscInt nr, nc; 42889d0448ceSStefano Zampini PetscMPIInt size; 428903e5aca4SStefano Zampini PetscBool flg = PETSC_TRUE; 42909d0448ceSStefano Zampini 42919d0448ceSStefano Zampini PetscFunctionBegin; 42929d0448ceSStefano Zampini #if defined(PETSC_USE_COMPLEX) 429303e5aca4SStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 429403e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 429503e5aca4SStefano Zampini *F = NULL; 429603e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 429703e5aca4SStefano Zampini } 42989d0448ceSStefano Zampini #endif 42999d0448ceSStefano Zampini 430003e5aca4SStefano Zampini /* Return if some condition is not satisfied */ 430103e5aca4SStefano Zampini *F = NULL; 43029d0448ceSStefano Zampini PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 43039d0448ceSStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY) { 43049d0448ceSStefano Zampini IS *rows, *cols; 43059d0448ceSStefano Zampini PetscInt *m, *M; 43069d0448ceSStefano Zampini 43079d0448ceSStefano Zampini PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 43089d0448ceSStefano Zampini PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 43099d0448ceSStefano Zampini PetscCall(MatNestGetISs(A, rows, cols)); 43109d0448ceSStefano Zampini for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 431103e5aca4SStefano Zampini if (!flg) { 431203e5aca4SStefano Zampini PetscCall(PetscFree2(rows, cols)); 431303e5aca4SStefano Zampini PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 431403e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 431503e5aca4SStefano Zampini } 43169d0448ceSStefano Zampini PetscCall(PetscMalloc2(nr, &m, nr, &M)); 43179d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 43189d0448ceSStefano Zampini for (PetscInt r = 0; flg && r < nr; r++) 43199d0448ceSStefano Zampini for (PetscInt k = r + 1; flg && k < nr; k++) 43209d0448ceSStefano Zampini if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 43219d0448ceSStefano Zampini PetscCall(PetscFree2(m, M)); 43229d0448ceSStefano Zampini PetscCall(PetscFree2(rows, cols)); 432303e5aca4SStefano Zampini if (!flg) { 432403e5aca4SStefano Zampini PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 432503e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 432603e5aca4SStefano Zampini } 43279d0448ceSStefano Zampini } 43289d0448ceSStefano Zampini 43299d0448ceSStefano Zampini for (PetscInt r = 0; r < nr; r++) { 43309d0448ceSStefano Zampini for (PetscInt c = 0; c < nc; c++) { 43319d0448ceSStefano Zampini Mat sub = mats[r][c]; 433253587d93SPierre Jolivet PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 43339d0448ceSStefano Zampini 43349d0448ceSStefano Zampini if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 433553587d93SPierre Jolivet PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 43369d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 43379d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 43389d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 43399d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 43409d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 43419d0448ceSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 4342c3e1b152SPierre Jolivet PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 43434b9405b2SPierre Jolivet PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 43449d0448ceSStefano Zampini if (ftype == MAT_FACTOR_CHOLESKY) { 4345dcab004fSPierre Jolivet if (r == c) { 43464b9405b2SPierre Jolivet if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 434740afc089SBarry Smith PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 434803e5aca4SStefano Zampini flg = PETSC_FALSE; 4349dcab004fSPierre Jolivet } 43504b9405b2SPierre Jolivet } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 435140afc089SBarry Smith PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 435203e5aca4SStefano Zampini flg = PETSC_FALSE; 435303e5aca4SStefano Zampini } 43544b9405b2SPierre Jolivet } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 43559afb9c56SPierre Jolivet PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 435603e5aca4SStefano Zampini flg = PETSC_FALSE; 43579d0448ceSStefano Zampini } 43589d0448ceSStefano Zampini } 435903e5aca4SStefano Zampini } 436003e5aca4SStefano Zampini if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 43619d0448ceSStefano Zampini 43629d0448ceSStefano Zampini /* Create the factorization matrix */ 43639d0448ceSStefano Zampini PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 43649d0448ceSStefano Zampini PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 43659d0448ceSStefano Zampini PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 43669d0448ceSStefano Zampini PetscCall(MatSetUp(B)); 43679d0448ceSStefano Zampini 43689d0448ceSStefano Zampini PetscCall(PetscNew(&mumps)); 43699d0448ceSStefano Zampini 43709d0448ceSStefano Zampini B->ops->view = MatView_MUMPS; 43719d0448ceSStefano Zampini B->ops->getinfo = MatGetInfo_MUMPS; 43729d0448ceSStefano Zampini 43739d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 43749d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 43759d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 43769d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 43779d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 43789d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 43799d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 43809d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 43819d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 43829d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 43839d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 43849d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 43859d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 43869d0448ceSStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 438793d70b8aSPierre Jolivet PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 43889d0448ceSStefano Zampini 43899d0448ceSStefano Zampini if (ftype == MAT_FACTOR_LU) { 43909d0448ceSStefano Zampini B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 43919d0448ceSStefano Zampini B->factortype = MAT_FACTOR_LU; 43929d0448ceSStefano Zampini mumps->sym = 0; 43939d0448ceSStefano Zampini } else { 43949d0448ceSStefano Zampini B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 43959d0448ceSStefano Zampini B->factortype = MAT_FACTOR_CHOLESKY; 43969d0448ceSStefano Zampini #if defined(PETSC_USE_COMPLEX) 43979d0448ceSStefano Zampini mumps->sym = 2; 43989d0448ceSStefano Zampini #else 43999d0448ceSStefano Zampini if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 44009d0448ceSStefano Zampini else mumps->sym = 2; 44019d0448ceSStefano Zampini #endif 44029d0448ceSStefano Zampini } 44039d0448ceSStefano Zampini mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 44049d0448ceSStefano Zampini PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 44059d0448ceSStefano Zampini 44069d0448ceSStefano Zampini PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 44079d0448ceSStefano Zampini if (size == 1) { 44089d0448ceSStefano Zampini /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 44099d0448ceSStefano Zampini B->canuseordering = PETSC_TRUE; 44109d0448ceSStefano Zampini } 44119d0448ceSStefano Zampini 44129d0448ceSStefano Zampini /* set solvertype */ 44139d0448ceSStefano Zampini PetscCall(PetscFree(B->solvertype)); 44149d0448ceSStefano Zampini PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 44159d0448ceSStefano Zampini B->ops->destroy = MatDestroy_MUMPS; 44169d0448ceSStefano Zampini B->data = (void *)mumps; 44179d0448ceSStefano Zampini 44189d0448ceSStefano Zampini *F = B; 44199d0448ceSStefano Zampini mumps->id.job = JOB_NULL; 44209d0448ceSStefano Zampini mumps->ICNTL_pre = NULL; 44219d0448ceSStefano Zampini mumps->CNTL_pre = NULL; 44229d0448ceSStefano Zampini mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 44239d0448ceSStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 44249d0448ceSStefano Zampini } 44259d0448ceSStefano Zampini 4426d1f0640dSPierre Jolivet PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 4427d71ae5a4SJacob Faibussowitsch { 442842c9c57cSBarry Smith PetscFunctionBegin; 44299566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 44309566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 44319566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 44329566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 44339566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 44349566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 44359566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 44369566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 44379566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 44389566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 44399566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 4440c3e1b152SPierre Jolivet PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4441c3e1b152SPierre Jolivet PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 44424b9405b2SPierre Jolivet PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 44434b9405b2SPierre Jolivet PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 44444b9405b2SPierre Jolivet PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 44454b9405b2SPierre Jolivet PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 44469d0448ceSStefano Zampini PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 44479d0448ceSStefano Zampini PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 44483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 444942c9c57cSBarry Smith } 4450