xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 7cd49bdee00a26e142bff6e7df7c3fb9209fa795)
1397b6df1SKris Buschelman /*
2c2b5dc30SHong Zhang     Provides an interface to the MUMPS sparse solver
3397b6df1SKris Buschelman */
467602552SJunchao Zhang #include <petscpkg_version.h>
59d0448ceSStefano Zampini #include <petscsf.h>
6c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
87ee00b23SStefano Zampini #include <../src/mat/impls/sell/mpi/mpisell.h>
9cf053153SJunchao Zhang #include <petsc/private/vecimpl.h>
10397b6df1SKris Buschelman 
119261f6e4SBarry Smith #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"
129261f6e4SBarry Smith 
13397b6df1SKris Buschelman EXTERN_C_BEGIN
14cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
15cf053153SJunchao Zhang   #include <cmumps_c.h>
16cf053153SJunchao Zhang   #include <zmumps_c.h>
17cf053153SJunchao Zhang   #include <smumps_c.h>
18cf053153SJunchao Zhang   #include <dmumps_c.h>
19cf053153SJunchao Zhang #else
20397b6df1SKris Buschelman   #if defined(PETSC_USE_COMPLEX)
212907cef9SHong Zhang     #if defined(PETSC_USE_REAL_SINGLE)
222907cef9SHong Zhang       #include <cmumps_c.h>
23cf053153SJunchao Zhang       #define MUMPS_c       cmumps_c
24cf053153SJunchao Zhang       #define MUMPS_STRUC_C CMUMPS_STRUC_C
25cf053153SJunchao Zhang       #define MumpsScalar   CMUMPS_COMPLEX
262907cef9SHong Zhang     #else
27c6db04a5SJed Brown       #include <zmumps_c.h>
28cf053153SJunchao Zhang       #define MUMPS_c       zmumps_c
29cf053153SJunchao Zhang       #define MUMPS_STRUC_C ZMUMPS_STRUC_C
30cf053153SJunchao Zhang       #define MumpsScalar   ZMUMPS_COMPLEX
312907cef9SHong Zhang     #endif
322907cef9SHong Zhang   #else
332907cef9SHong Zhang     #if defined(PETSC_USE_REAL_SINGLE)
342907cef9SHong Zhang       #include <smumps_c.h>
35cf053153SJunchao Zhang       #define MUMPS_c       smumps_c
36cf053153SJunchao Zhang       #define MUMPS_STRUC_C SMUMPS_STRUC_C
37cf053153SJunchao Zhang       #define MumpsScalar   SMUMPS_REAL
38397b6df1SKris Buschelman     #else
39c6db04a5SJed Brown       #include <dmumps_c.h>
40cf053153SJunchao Zhang       #define MUMPS_c       dmumps_c
41cf053153SJunchao Zhang       #define MUMPS_STRUC_C DMUMPS_STRUC_C
42cf053153SJunchao Zhang       #define MumpsScalar   DMUMPS_REAL
43cf053153SJunchao Zhang     #endif
44397b6df1SKris Buschelman   #endif
452907cef9SHong Zhang #endif
46397b6df1SKris Buschelman EXTERN_C_END
47cf053153SJunchao Zhang 
48397b6df1SKris Buschelman #define JOB_INIT         -1
49413bcc21SPierre Jolivet #define JOB_NULL         0
503d472b54SHong Zhang #define JOB_FACTSYMBOLIC 1
513d472b54SHong Zhang #define JOB_FACTNUMERIC  2
523d472b54SHong Zhang #define JOB_SOLVE        3
53397b6df1SKris Buschelman #define JOB_END          -2
543d472b54SHong Zhang 
55a6053eceSJunchao Zhang /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
56a6053eceSJunchao Zhang    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
57a6053eceSJunchao Zhang    naming convention in PetscMPIInt, PetscBLASInt etc.
58a6053eceSJunchao Zhang */
59a6053eceSJunchao Zhang typedef MUMPS_INT PetscMUMPSInt;
60a6053eceSJunchao Zhang 
6167602552SJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
6267602552SJunchao Zhang   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
63f0b74427SPierre Jolivet     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
6467602552SJunchao Zhang   #endif
65a6053eceSJunchao Zhang #else
6667602552SJunchao Zhang   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
67f0b74427SPierre Jolivet     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
6867602552SJunchao Zhang   #endif
6967602552SJunchao Zhang #endif
7067602552SJunchao Zhang 
71a6053eceSJunchao Zhang #define MPIU_MUMPSINT       MPI_INT
72a6053eceSJunchao Zhang #define PETSC_MUMPS_INT_MAX 2147483647
73a6053eceSJunchao Zhang #define PETSC_MUMPS_INT_MIN -2147483648
74a6053eceSJunchao Zhang 
75a6053eceSJunchao Zhang /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
766497c311SBarry Smith static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b)
77d71ae5a4SJacob Faibussowitsch {
78a6053eceSJunchao Zhang   PetscFunctionBegin;
79ece88022SPierre Jolivet #if PetscDefined(USE_64BIT_INDICES)
802c71b3e2SJacob Faibussowitsch   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
81ece88022SPierre Jolivet #endif
8257508eceSPierre Jolivet   *b = (PetscMUMPSInt)a;
833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
84a6053eceSJunchao Zhang }
85a6053eceSJunchao Zhang 
86a6053eceSJunchao Zhang /* Put these utility routines here since they are only used in this file */
87ce78bad3SBarry Smith static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
88d71ae5a4SJacob Faibussowitsch {
89a6053eceSJunchao Zhang   PetscInt  myval;
90a6053eceSJunchao Zhang   PetscBool myset;
914d86920dSPierre Jolivet 
92a6053eceSJunchao Zhang   PetscFunctionBegin;
93a6053eceSJunchao Zhang   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
949566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
959566063dSJacob Faibussowitsch   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
96a6053eceSJunchao Zhang   if (set) *set = myset;
973ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
98a6053eceSJunchao Zhang }
99a6053eceSJunchao Zhang #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
100a6053eceSJunchao Zhang 
101cf053153SJunchao Zhang // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C.
102cf053153SJunchao Zhang //
103cf053153SJunchao Zhang // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type.
104cf053153SJunchao Zhang // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size
105cf053153SJunchao Zhang // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall().
106cf053153SJunchao Zhang //
107cf053153SJunchao Zhang // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface.
108cf053153SJunchao Zhang // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used.
109cf053153SJunchao Zhang // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers.
110cf053153SJunchao Zhang typedef struct {
111cf053153SJunchao Zhang   PetscPrecision precision;   // precision used by MUMPS
112cf053153SJunchao Zhang   void          *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex
113cf053153SJunchao Zhang 
114cf053153SJunchao Zhang   // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions.
115cf053153SJunchao Zhang   MUMPS_INT  sym, par, job;
116cf053153SJunchao Zhang   MUMPS_INT  comm_fortran; /* Fortran communicator */
117cf053153SJunchao Zhang   MUMPS_INT *icntl;
118cf053153SJunchao Zhang   void      *cntl; // MumpsReal, fixed size array
119cf053153SJunchao Zhang   MUMPS_INT  n;
120cf053153SJunchao Zhang   MUMPS_INT  nblk;
121cf053153SJunchao Zhang 
122cf053153SJunchao Zhang   /* Assembled entry */
123cf053153SJunchao Zhang   MUMPS_INT8 nnz;
124cf053153SJunchao Zhang   MUMPS_INT *irn;
125cf053153SJunchao Zhang   MUMPS_INT *jcn;
126cf053153SJunchao Zhang   void      *a; // MumpsScalar, centralized input
127cf053153SJunchao Zhang   PetscCount a_len;
128cf053153SJunchao Zhang 
129cf053153SJunchao Zhang   /* Distributed entry */
130cf053153SJunchao Zhang   MUMPS_INT8 nnz_loc;
131cf053153SJunchao Zhang   MUMPS_INT *irn_loc;
132cf053153SJunchao Zhang   MUMPS_INT *jcn_loc;
133cf053153SJunchao Zhang   void      *a_loc; // MumpsScalar, distributed input
134cf053153SJunchao Zhang   PetscCount a_loc_len;
135cf053153SJunchao Zhang 
136cf053153SJunchao Zhang   /* Matrix by blocks */
137cf053153SJunchao Zhang   MUMPS_INT *blkptr;
138cf053153SJunchao Zhang   MUMPS_INT *blkvar;
139cf053153SJunchao Zhang 
140cf053153SJunchao Zhang   /* Ordering, if given by user */
141cf053153SJunchao Zhang   MUMPS_INT *perm_in;
142cf053153SJunchao Zhang 
143cf053153SJunchao Zhang   /* RHS, solution, ouptput data and statistics */
144cf053153SJunchao Zhang   void      *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc;                 // MumpsScalar buffers
145cf053153SJunchao Zhang   PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar
146cf053153SJunchao Zhang 
147cf053153SJunchao Zhang   MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc;
1487096bf6aSJunchao Zhang   MUMPS_INT  nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc;
1497096bf6aSJunchao Zhang   // MUMPS_INT  nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0)
150cf053153SJunchao Zhang   MUMPS_INT  schur_lld;
151cf053153SJunchao Zhang   MUMPS_INT *info, *infog;   // fixed size array
152cf053153SJunchao Zhang   void      *rinfo, *rinfog; // MumpsReal, fixed size array
153cf053153SJunchao Zhang 
154cf053153SJunchao Zhang   /* Null space */
155cf053153SJunchao Zhang   MUMPS_INT *pivnul_list; // allocated by MUMPS!
156cf053153SJunchao Zhang   MUMPS_INT *mapping;     // allocated by MUMPS!
157cf053153SJunchao Zhang 
158cf053153SJunchao Zhang   /* Schur */
159cf053153SJunchao Zhang   MUMPS_INT  size_schur;
160cf053153SJunchao Zhang   MUMPS_INT *listvar_schur;
161cf053153SJunchao Zhang   void      *schur; // MumpsScalar
162cf053153SJunchao Zhang   PetscCount schur_len;
163cf053153SJunchao Zhang 
164cf053153SJunchao Zhang   /* For out-of-core */
165cf053153SJunchao Zhang   char *ooc_tmpdir; // fixed size array
166cf053153SJunchao Zhang   char *ooc_prefix; // fixed size array
167cf053153SJunchao Zhang } XMUMPS_STRUC_C;
168cf053153SJunchao Zhang 
169cf053153SJunchao Zhang // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct
170cf053153SJunchao Zhang #define AllocatInternalID(MUMPS_STRUC_T, outer) \
171cf053153SJunchao Zhang   do { \
172cf053153SJunchao Zhang     MUMPS_STRUC_T *inner; \
173cf053153SJunchao Zhang     PetscCall(PetscNew(&inner)); \
174cf053153SJunchao Zhang     outer->icntl      = inner->icntl; \
175cf053153SJunchao Zhang     outer->cntl       = inner->cntl; \
176cf053153SJunchao Zhang     outer->info       = inner->info; \
177cf053153SJunchao Zhang     outer->infog      = inner->infog; \
178cf053153SJunchao Zhang     outer->rinfo      = inner->rinfo; \
179cf053153SJunchao Zhang     outer->rinfog     = inner->rinfog; \
180cf053153SJunchao Zhang     outer->ooc_tmpdir = inner->ooc_tmpdir; \
181cf053153SJunchao Zhang     outer->ooc_prefix = inner->ooc_prefix; \
182cf053153SJunchao Zhang     /* the three field should never change after init */ \
183cf053153SJunchao Zhang     inner->comm_fortran = outer->comm_fortran; \
184cf053153SJunchao Zhang     inner->par          = outer->par; \
185cf053153SJunchao Zhang     inner->sym          = outer->sym; \
186cf053153SJunchao Zhang     outer->internal_id  = inner; \
187cf053153SJunchao Zhang   } while (0)
188cf053153SJunchao Zhang 
189cf053153SJunchao Zhang // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given <precision>, and link fields of the outer and the inner
190cf053153SJunchao Zhang static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision)
191cf053153SJunchao Zhang {
192cf053153SJunchao Zhang   PetscFunctionBegin;
193cf053153SJunchao Zhang   outer->precision = precision;
194cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
195cf053153SJunchao Zhang   #if defined(PETSC_USE_COMPLEX)
196cf053153SJunchao Zhang   if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(CMUMPS_STRUC_C, outer);
197cf053153SJunchao Zhang   else AllocatInternalID(ZMUMPS_STRUC_C, outer);
198cf053153SJunchao Zhang   #else
199cf053153SJunchao Zhang   if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(SMUMPS_STRUC_C, outer);
200cf053153SJunchao Zhang   else AllocatInternalID(DMUMPS_STRUC_C, outer);
201cf053153SJunchao Zhang   #endif
202cf053153SJunchao Zhang #else
203cf053153SJunchao Zhang   AllocatInternalID(MUMPS_STRUC_C, outer);
204cf053153SJunchao Zhang #endif
205cf053153SJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
206cf053153SJunchao Zhang }
207cf053153SJunchao Zhang 
208cf053153SJunchao Zhang #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \
209cf053153SJunchao Zhang   do { \
210cf053153SJunchao Zhang     MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \
211cf053153SJunchao Zhang     PetscCall(PetscFree(inner->a)); \
212cf053153SJunchao Zhang     PetscCall(PetscFree(inner->a_loc)); \
213cf053153SJunchao Zhang     PetscCall(PetscFree(inner->redrhs)); \
214cf053153SJunchao Zhang     PetscCall(PetscFree(inner->rhs)); \
215cf053153SJunchao Zhang     PetscCall(PetscFree(inner->rhs_sparse)); \
216cf053153SJunchao Zhang     PetscCall(PetscFree(inner->rhs_loc)); \
217cf053153SJunchao Zhang     PetscCall(PetscFree(inner->sol_loc)); \
218cf053153SJunchao Zhang     PetscCall(PetscFree(inner->schur)); \
219cf053153SJunchao Zhang   } while (0)
220cf053153SJunchao Zhang 
221cf053153SJunchao Zhang static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer)
222cf053153SJunchao Zhang {
223cf053153SJunchao Zhang   PetscFunctionBegin;
224cf053153SJunchao Zhang   if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer
225cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
226cf053153SJunchao Zhang     const PetscPrecision mumps_precision = outer->precision;
227cf053153SJunchao Zhang     if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision
228cf053153SJunchao Zhang   #if defined(PETSC_USE_COMPLEX)
229cf053153SJunchao Zhang       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer);
230cf053153SJunchao Zhang       else FreeInternalIDFields(ZMUMPS_STRUC_C, outer);
231cf053153SJunchao Zhang   #else
232cf053153SJunchao Zhang       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer);
233cf053153SJunchao Zhang       else FreeInternalIDFields(DMUMPS_STRUC_C, outer);
234cf053153SJunchao Zhang   #endif
235cf053153SJunchao Zhang     }
236cf053153SJunchao Zhang #endif
237cf053153SJunchao Zhang     PetscCall(PetscFree(outer->internal_id));
238cf053153SJunchao Zhang   }
239cf053153SJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
240cf053153SJunchao Zhang }
241cf053153SJunchao Zhang 
242cf053153SJunchao Zhang // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least <n> MumpsScalars in the given <precision> and return the address at <ma>.
243cf053153SJunchao Zhang // <convert> indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array.
244cf053153SJunchao Zhang // (For bravity, we use <ma> for array address and <m> for its length in MumpsScalar, though in code they should be <*ma> and <*m>)
245cf053153SJunchao Zhang // If <ma> already points to a buffer/array, on input <m> should be its length. Note the buffer might be freed if it is not big enough for this request.
246cf053153SJunchao Zhang //
247cf053153SJunchao Zhang // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same.
248cf053153SJunchao Zhang // 1) If they are different, a separate array will be made and its length and address will be provided at <m> and <ma> on output.
249cf053153SJunchao Zhang // 2) Otherwise, <pa> will be returned in <ma>, and <m> will be zero on output.
250cf053153SJunchao Zhang //
251cf053153SJunchao Zhang //
252cf053153SJunchao Zhang //   Input parameters:
253cf053153SJunchao Zhang // + convert   - whether to do PetscScalar to MumpsScalar conversion
254cf053153SJunchao Zhang // . n         - length of the PetscScalar array
255cf053153SJunchao Zhang // . pa        - [n]], points to the PetscScalar array
256cf053153SJunchao Zhang // . precision - precision of MumpsScalar
257cf053153SJunchao Zhang // . m         - on input, length of an existing MumpsScalar array <ma> if any, otherwise *m is just zero.
258cf053153SJunchao Zhang // - ma        - on input, an existing MumpsScalar array if any.
259cf053153SJunchao Zhang //
260cf053153SJunchao Zhang //   Output parameters:
261cf053153SJunchao Zhang // + m  - length of the MumpsScalar buffer at <ma> if MumpsScalar is different from PetscScalar, otherwise 0
262cf053153SJunchao Zhang // . ma - the MumpsScalar array, which could be an alias of <pa> when the two types are the same.
263cf053153SJunchao Zhang //
264cf053153SJunchao Zhang //   Note:
265cf053153SJunchao Zhang //    New memory, if allocated, is done via PetscMalloc1(), and is owned by caller.
266cf053153SJunchao Zhang static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma)
267cf053153SJunchao Zhang {
268cf053153SJunchao Zhang   PetscFunctionBegin;
269cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
270cf053153SJunchao Zhang   const PetscPrecision mumps_precision = precision;
271cf053153SJunchao Zhang   PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision);
272cf053153SJunchao Zhang   #if defined(PETSC_USE_COMPLEX)
273cf053153SJunchao Zhang   if (mumps_precision != PETSC_SCALAR_PRECISION) {
274cf053153SJunchao Zhang     if (mumps_precision == PETSC_PRECISION_SINGLE) {
275cf053153SJunchao Zhang       if (*m < n) {
276cf053153SJunchao Zhang         PetscCall(PetscFree(*ma));
277cf053153SJunchao Zhang         PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma));
278cf053153SJunchao Zhang         *m = n;
279cf053153SJunchao Zhang       }
280cf053153SJunchao Zhang       if (convert) {
281cf053153SJunchao Zhang         CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma;
282cf053153SJunchao Zhang         for (PetscCount i = 0; i < n; i++) {
283cf053153SJunchao Zhang           b[i].r = PetscRealPart(pa[i]);
284cf053153SJunchao Zhang           b[i].i = PetscImaginaryPart(pa[i]);
2852893035cSJunchao Zhang         }
286cf053153SJunchao Zhang       }
287cf053153SJunchao Zhang     } else {
288cf053153SJunchao Zhang       if (*m < n) {
289cf053153SJunchao Zhang         PetscCall(PetscFree(*ma));
290cf053153SJunchao Zhang         PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma));
291cf053153SJunchao Zhang         *m = n;
292cf053153SJunchao Zhang       }
293cf053153SJunchao Zhang       if (convert) {
294cf053153SJunchao Zhang         ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma;
295cf053153SJunchao Zhang         for (PetscCount i = 0; i < n; i++) {
296cf053153SJunchao Zhang           b[i].r = PetscRealPart(pa[i]);
297cf053153SJunchao Zhang           b[i].i = PetscImaginaryPart(pa[i]);
298cf053153SJunchao Zhang         }
299cf053153SJunchao Zhang       }
300cf053153SJunchao Zhang     }
301cf053153SJunchao Zhang   }
302cf053153SJunchao Zhang   #else
303cf053153SJunchao Zhang   if (mumps_precision != PETSC_SCALAR_PRECISION) {
304cf053153SJunchao Zhang     if (mumps_precision == PETSC_PRECISION_SINGLE) {
305cf053153SJunchao Zhang       if (*m < n) {
306cf053153SJunchao Zhang         PetscCall(PetscFree(*ma));
307cf053153SJunchao Zhang         PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma));
308cf053153SJunchao Zhang         *m = n;
309cf053153SJunchao Zhang       }
310cf053153SJunchao Zhang       if (convert) {
311cf053153SJunchao Zhang         SMUMPS_REAL *b = *(SMUMPS_REAL **)ma;
312cf053153SJunchao Zhang         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
313cf053153SJunchao Zhang       }
314cf053153SJunchao Zhang     } else {
315cf053153SJunchao Zhang       if (*m < n) {
316cf053153SJunchao Zhang         PetscCall(PetscFree(*ma));
317cf053153SJunchao Zhang         PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma));
318cf053153SJunchao Zhang         *m = n;
319cf053153SJunchao Zhang       }
320cf053153SJunchao Zhang       if (convert) {
321cf053153SJunchao Zhang         DMUMPS_REAL *b = *(DMUMPS_REAL **)ma;
322cf053153SJunchao Zhang         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
323cf053153SJunchao Zhang       }
324cf053153SJunchao Zhang     }
325cf053153SJunchao Zhang   }
326cf053153SJunchao Zhang   #endif
327cf053153SJunchao Zhang   else
328cf053153SJunchao Zhang #endif
329cf053153SJunchao Zhang   {
330cf053153SJunchao Zhang     if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any
331cf053153SJunchao Zhang     *ma = (void *)pa;                       // same precision, make them alias
332cf053153SJunchao Zhang     *m  = 0;
333cf053153SJunchao Zhang   }
334cf053153SJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
335cf053153SJunchao Zhang }
336cf053153SJunchao Zhang 
337cf053153SJunchao Zhang // Cast a MumpsScalar array <ma[n]> in <mumps_precision> to a PetscScalar array at address <pa>.
338cf053153SJunchao Zhang //
339cf053153SJunchao Zhang // 1) If the two types are different, cast array elements.
340cf053153SJunchao Zhang // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
341cf053153SJunchao Zhang static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa)
342cf053153SJunchao Zhang {
343cf053153SJunchao Zhang   PetscFunctionBegin;
344cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
345cf053153SJunchao Zhang   if (mumps_precision != PETSC_SCALAR_PRECISION) {
346cf053153SJunchao Zhang   #if defined(PETSC_USE_COMPLEX)
347cf053153SJunchao Zhang     if (mumps_precision == PETSC_PRECISION_SINGLE) {
348cf053153SJunchao Zhang       PetscReal         *a = (PetscReal *)pa;
349cf053153SJunchao Zhang       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
350cf053153SJunchao Zhang       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
351cf053153SJunchao Zhang     } else {
352cf053153SJunchao Zhang       PetscReal         *a = (PetscReal *)pa;
353cf053153SJunchao Zhang       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
354cf053153SJunchao Zhang       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
355cf053153SJunchao Zhang     }
356cf053153SJunchao Zhang   #else
357cf053153SJunchao Zhang     if (mumps_precision == PETSC_PRECISION_SINGLE) {
358cf053153SJunchao Zhang       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
359cf053153SJunchao Zhang       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
360cf053153SJunchao Zhang     } else {
361cf053153SJunchao Zhang       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
362cf053153SJunchao Zhang       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
363cf053153SJunchao Zhang     }
364cf053153SJunchao Zhang   #endif
365cf053153SJunchao Zhang   } else
366cf053153SJunchao Zhang #endif
367cf053153SJunchao Zhang     PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n));
368cf053153SJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
369cf053153SJunchao Zhang }
370cf053153SJunchao Zhang 
371cf053153SJunchao Zhang // Cast a PetscScalar array <pa[n]> to a MumpsScalar array in the given <mumps_precision> at address <ma>.
372cf053153SJunchao Zhang //
373cf053153SJunchao Zhang // 1) If the two types are different, cast array elements.
374cf053153SJunchao Zhang // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
375cf053153SJunchao Zhang static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma)
376cf053153SJunchao Zhang {
377cf053153SJunchao Zhang   PetscFunctionBegin;
378cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
379cf053153SJunchao Zhang   if (mumps_precision != PETSC_SCALAR_PRECISION) {
380cf053153SJunchao Zhang   #if defined(PETSC_USE_COMPLEX)
381cf053153SJunchao Zhang     if (mumps_precision == PETSC_PRECISION_SINGLE) {
382cf053153SJunchao Zhang       CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma;
383cf053153SJunchao Zhang       for (PetscCount i = 0; i < n; i++) {
384cf053153SJunchao Zhang         b[i].r = PetscRealPart(pa[i]);
385cf053153SJunchao Zhang         b[i].i = PetscImaginaryPart(pa[i]);
386cf053153SJunchao Zhang       }
387cf053153SJunchao Zhang     } else {
388cf053153SJunchao Zhang       ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma;
389cf053153SJunchao Zhang       for (PetscCount i = 0; i < n; i++) {
390cf053153SJunchao Zhang         b[i].r = PetscRealPart(pa[i]);
391cf053153SJunchao Zhang         b[i].i = PetscImaginaryPart(pa[i]);
392cf053153SJunchao Zhang       }
393cf053153SJunchao Zhang     }
394cf053153SJunchao Zhang   #else
395cf053153SJunchao Zhang     if (mumps_precision == PETSC_PRECISION_SINGLE) {
396cf053153SJunchao Zhang       SMUMPS_REAL *b = (SMUMPS_REAL *)ma;
397cf053153SJunchao Zhang       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
398cf053153SJunchao Zhang     } else {
399cf053153SJunchao Zhang       DMUMPS_REAL *b = (DMUMPS_REAL *)ma;
400cf053153SJunchao Zhang       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
401cf053153SJunchao Zhang     }
402cf053153SJunchao Zhang   #endif
403cf053153SJunchao Zhang   } else
404cf053153SJunchao Zhang #endif
405cf053153SJunchao Zhang     PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n));
406cf053153SJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
407cf053153SJunchao Zhang }
408cf053153SJunchao Zhang 
409cf053153SJunchao Zhang static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id)
410cf053153SJunchao Zhang {
411cf053153SJunchao Zhang   return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT;
412cf053153SJunchao Zhang }
413cf053153SJunchao Zhang 
414cf053153SJunchao Zhang #define PreMumpsCall(inner, outer, mumpsscalar) \
415cf053153SJunchao Zhang   do { \
416cf053153SJunchao Zhang     inner->job           = outer->job; \
417cf053153SJunchao Zhang     inner->n             = outer->n; \
418cf053153SJunchao Zhang     inner->nblk          = outer->nblk; \
419cf053153SJunchao Zhang     inner->nnz           = outer->nnz; \
420cf053153SJunchao Zhang     inner->irn           = outer->irn; \
421cf053153SJunchao Zhang     inner->jcn           = outer->jcn; \
422cf053153SJunchao Zhang     inner->a             = (mumpsscalar *)outer->a; \
423cf053153SJunchao Zhang     inner->nnz_loc       = outer->nnz_loc; \
424cf053153SJunchao Zhang     inner->irn_loc       = outer->irn_loc; \
425cf053153SJunchao Zhang     inner->jcn_loc       = outer->jcn_loc; \
426cf053153SJunchao Zhang     inner->a_loc         = (mumpsscalar *)outer->a_loc; \
427cf053153SJunchao Zhang     inner->blkptr        = outer->blkptr; \
428cf053153SJunchao Zhang     inner->blkvar        = outer->blkvar; \
429cf053153SJunchao Zhang     inner->perm_in       = outer->perm_in; \
430cf053153SJunchao Zhang     inner->rhs           = (mumpsscalar *)outer->rhs; \
431cf053153SJunchao Zhang     inner->redrhs        = (mumpsscalar *)outer->redrhs; \
432cf053153SJunchao Zhang     inner->rhs_sparse    = (mumpsscalar *)outer->rhs_sparse; \
433cf053153SJunchao Zhang     inner->sol_loc       = (mumpsscalar *)outer->sol_loc; \
434cf053153SJunchao Zhang     inner->rhs_loc       = (mumpsscalar *)outer->rhs_loc; \
435cf053153SJunchao Zhang     inner->irhs_sparse   = outer->irhs_sparse; \
436cf053153SJunchao Zhang     inner->irhs_ptr      = outer->irhs_ptr; \
437cf053153SJunchao Zhang     inner->isol_loc      = outer->isol_loc; \
438cf053153SJunchao Zhang     inner->irhs_loc      = outer->irhs_loc; \
439cf053153SJunchao Zhang     inner->nrhs          = outer->nrhs; \
440cf053153SJunchao Zhang     inner->lrhs          = outer->lrhs; \
441cf053153SJunchao Zhang     inner->lredrhs       = outer->lredrhs; \
442cf053153SJunchao Zhang     inner->nz_rhs        = outer->nz_rhs; \
443cf053153SJunchao Zhang     inner->lsol_loc      = outer->lsol_loc; \
444cf053153SJunchao Zhang     inner->nloc_rhs      = outer->nloc_rhs; \
445cf053153SJunchao Zhang     inner->lrhs_loc      = outer->lrhs_loc; \
446cf053153SJunchao Zhang     inner->schur_lld     = outer->schur_lld; \
447cf053153SJunchao Zhang     inner->size_schur    = outer->size_schur; \
448cf053153SJunchao Zhang     inner->listvar_schur = outer->listvar_schur; \
449cf053153SJunchao Zhang     inner->schur         = (mumpsscalar *)outer->schur; \
450cf053153SJunchao Zhang   } while (0)
451cf053153SJunchao Zhang 
452cf053153SJunchao Zhang #define PostMumpsCall(inner, outer) \
453cf053153SJunchao Zhang   do { \
454cf053153SJunchao Zhang     outer->pivnul_list = inner->pivnul_list; \
455cf053153SJunchao Zhang     outer->mapping     = inner->mapping; \
456cf053153SJunchao Zhang   } while (0)
457cf053153SJunchao Zhang 
458cf053153SJunchao Zhang // Entry for PETSc to call mumps
459cf053153SJunchao Zhang static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer)
460cf053153SJunchao Zhang {
461cf053153SJunchao Zhang   PetscFunctionBegin;
462cf053153SJunchao Zhang #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
463cf053153SJunchao Zhang   #if defined(PETSC_USE_COMPLEX)
464cf053153SJunchao Zhang   if (outer->precision == PETSC_PRECISION_SINGLE) {
465cf053153SJunchao Zhang     CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id;
466cf053153SJunchao Zhang     PreMumpsCall(inner, outer, CMUMPS_COMPLEX);
467cf053153SJunchao Zhang     PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner));
468cf053153SJunchao Zhang     PostMumpsCall(inner, outer);
469cf053153SJunchao Zhang   } else {
470cf053153SJunchao Zhang     ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id;
471cf053153SJunchao Zhang     PreMumpsCall(inner, outer, ZMUMPS_COMPLEX);
472cf053153SJunchao Zhang     PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner));
473cf053153SJunchao Zhang     PostMumpsCall(inner, outer);
474cf053153SJunchao Zhang   }
475cf053153SJunchao Zhang   #else
476cf053153SJunchao Zhang   if (outer->precision == PETSC_PRECISION_SINGLE) {
477cf053153SJunchao Zhang     SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id;
478cf053153SJunchao Zhang     PreMumpsCall(inner, outer, SMUMPS_REAL);
479cf053153SJunchao Zhang     PetscStackCallExternalVoid("smumps_c", smumps_c(inner));
480cf053153SJunchao Zhang     PostMumpsCall(inner, outer);
481cf053153SJunchao Zhang   } else {
482cf053153SJunchao Zhang     DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id;
483cf053153SJunchao Zhang     PreMumpsCall(inner, outer, DMUMPS_REAL);
484cf053153SJunchao Zhang     PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner));
485cf053153SJunchao Zhang     PostMumpsCall(inner, outer);
486cf053153SJunchao Zhang   }
487cf053153SJunchao Zhang   #endif
488cf053153SJunchao Zhang #else
489cf053153SJunchao Zhang   MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id;
490cf053153SJunchao Zhang   PreMumpsCall(inner, outer, MumpsScalar);
491cf053153SJunchao Zhang   PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner));
492cf053153SJunchao Zhang   PostMumpsCall(inner, outer);
493cf053153SJunchao Zhang #endif
494cf053153SJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
495cf053153SJunchao Zhang }
496cf053153SJunchao Zhang 
497cf053153SJunchao Zhang /* macros s.t. indices match MUMPS documentation */
498cf053153SJunchao Zhang #define ICNTL(I) icntl[(I) - 1]
499cf053153SJunchao Zhang #define INFOG(I) infog[(I) - 1]
500cf053153SJunchao Zhang #define INFO(I)  info[(I) - 1]
501cf053153SJunchao Zhang 
502cf053153SJunchao Zhang // Get a value from a MumpsScalar array, which is the <F> field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I!
503cf053153SJunchao Zhang #if defined(PETSC_USE_COMPLEX)
504cf053153SJunchao Zhang   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i)
505cf053153SJunchao Zhang #else
506cf053153SJunchao Zhang   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I])
507cf053153SJunchao Zhang #endif
508cf053153SJunchao Zhang 
509cf053153SJunchao Zhang // Get a value from MumpsReal arrays. The value is convertible to PetscReal.
510cf053153SJunchao Zhang #define ID_CNTL_GET(ID, I)   ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1])
511cf053153SJunchao Zhang #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1])
512cf053153SJunchao Zhang #define ID_RINFO_GET(ID, I)  ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1])
513cf053153SJunchao Zhang 
514cf053153SJunchao Zhang // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal <VAL>
515cf053153SJunchao Zhang #define ID_CNTL_SET(ID, I, VAL) \
516cf053153SJunchao Zhang   do { \
517cf053153SJunchao Zhang     if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \
518cf053153SJunchao Zhang     else ((double *)(ID).cntl)[(I) - 1] = (VAL); \
519cf053153SJunchao Zhang   } while (0)
520cf053153SJunchao Zhang 
521217d3b1eSJunchao Zhang /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
5223ab56b82SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT)
5233ab56b82SJunchao Zhang   #define PetscMUMPS_c(mumps) \
5243ab56b82SJunchao Zhang     do { \
5253ab56b82SJunchao Zhang       if (mumps->use_petsc_omp_support) { \
5263ab56b82SJunchao Zhang         if (mumps->is_omp_master) { \
5279566063dSJacob Faibussowitsch           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
52814ffdc6fSStefano Zampini           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
529cf053153SJunchao Zhang           PetscCall(PetscCallMumps_Private(&mumps->id)); \
53014ffdc6fSStefano Zampini           PetscCall(PetscFPTrapPop()); \
5319566063dSJacob Faibussowitsch           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
5323ab56b82SJunchao Zhang         } \
5339566063dSJacob Faibussowitsch         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
534c3714a1dSJunchao Zhang         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
535c3714a1dSJunchao Zhang          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
536c3714a1dSJunchao Zhang          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
537c3714a1dSJunchao Zhang          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
538c3714a1dSJunchao Zhang       */ \
539cf053153SJunchao Zhang         SMUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \
540cf053153SJunchao Zhang         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
541cf053153SJunchao Zhang         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
542cf053153SJunchao Zhang         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
543cf053153SJunchao Zhang         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
5443ab56b82SJunchao Zhang       } else { \
54514ffdc6fSStefano Zampini         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
546cf053153SJunchao Zhang         PetscCall(PetscCallMumps_Private(&mumps->id)); \
54714ffdc6fSStefano Zampini         PetscCall(PetscFPTrapPop()); \
5483ab56b82SJunchao Zhang       } \
5493ab56b82SJunchao Zhang     } while (0)
5503ab56b82SJunchao Zhang #else
5513ab56b82SJunchao Zhang   #define PetscMUMPS_c(mumps) \
552d71ae5a4SJacob Faibussowitsch     do { \
55314ffdc6fSStefano Zampini       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
554cf053153SJunchao Zhang       PetscCall(PetscCallMumps_Private(&mumps->id)); \
55514ffdc6fSStefano Zampini       PetscCall(PetscFPTrapPop()); \
556d71ae5a4SJacob Faibussowitsch     } while (0)
5573ab56b82SJunchao Zhang #endif
5583ab56b82SJunchao Zhang 
559a6053eceSJunchao Zhang typedef struct Mat_MUMPS Mat_MUMPS;
560a6053eceSJunchao Zhang struct Mat_MUMPS {
561cf053153SJunchao Zhang   XMUMPS_STRUC_C id;
5622907cef9SHong Zhang 
563397b6df1SKris Buschelman   MatStructure   matstruc;
5642d4298aeSJunchao Zhang   PetscMPIInt    myid, petsc_size;
565a6053eceSJunchao Zhang   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
566a6053eceSJunchao Zhang   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
5676497c311SBarry Smith   PetscCount     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
568a6053eceSJunchao Zhang   PetscMUMPSInt  sym;
5692d4298aeSJunchao Zhang   MPI_Comm       mumps_comm;
570413bcc21SPierre Jolivet   PetscMUMPSInt *ICNTL_pre;
571413bcc21SPierre Jolivet   PetscReal     *CNTL_pre;
572a6053eceSJunchao Zhang   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
573801fbe65SHong Zhang   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
57425aac85cSJunchao Zhang   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
575cf053153SJunchao Zhang   PetscMUMPSInt  ICNTL26;
57667602552SJunchao Zhang   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
57767602552SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT)
57867602552SJunchao Zhang   PetscInt    *rhs_nrow, max_nrhs;
57967602552SJunchao Zhang   PetscMPIInt *rhs_recvcounts, *rhs_disps;
58067602552SJunchao Zhang   PetscScalar *rhs_loc, *rhs_recvbuf;
58167602552SJunchao Zhang #endif
582801fbe65SHong Zhang   Vec            b_seq, x_seq;
583a6053eceSJunchao Zhang   PetscInt       ninfo, *info; /* which INFO to display */
584b5fa320bSStefano Zampini   PetscInt       sizeredrhs;
58559ac8732SStefano Zampini   PetscScalar   *schur_sol;
58659ac8732SStefano Zampini   PetscInt       schur_sizesol;
587cf053153SJunchao Zhang   PetscScalar   *redrhs;              // buffer in PetscScalar in case MumpsScalar is in a different precision
588a6053eceSJunchao Zhang   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
5896497c311SBarry Smith   PetscCount     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
590a6053eceSJunchao Zhang   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
5912205254eSKarl Rupp 
5929d0448ceSStefano Zampini   /* Support for MATNEST */
5939d0448ceSStefano Zampini   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
5946497c311SBarry Smith   PetscCount  *nest_vals_start;
5959d0448ceSStefano Zampini   PetscScalar *nest_vals;
5969d0448ceSStefano Zampini 
597a6053eceSJunchao Zhang   /* stuff used by petsc/mumps OpenMP support*/
5983ab56b82SJunchao Zhang   PetscBool    use_petsc_omp_support;
599da81f932SPierre Jolivet   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
600f0b74427SPierre Jolivet   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */
6016497c311SBarry Smith   PetscCount  *recvcount;            /* a collection of nnz on omp_master */
602a6053eceSJunchao Zhang   PetscMPIInt  tag, omp_comm_size;
6033ab56b82SJunchao Zhang   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
604a6053eceSJunchao Zhang   MPI_Request *reqs;
605a6053eceSJunchao Zhang };
6063ab56b82SJunchao Zhang 
607a6053eceSJunchao Zhang /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
608a6053eceSJunchao Zhang    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
609a6053eceSJunchao Zhang  */
610d2a308c1SPierre Jolivet static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
611d71ae5a4SJacob Faibussowitsch {
6126497c311SBarry Smith   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */
613f0c56d0fSKris Buschelman 
614a6053eceSJunchao Zhang   PetscFunctionBegin;
615a6053eceSJunchao Zhang #if defined(PETSC_USE_64BIT_INDICES)
616a6053eceSJunchao Zhang   {
617a6053eceSJunchao Zhang     PetscInt i;
618a6053eceSJunchao Zhang     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
6199566063dSJacob Faibussowitsch       PetscCall(PetscFree(mumps->ia_alloc));
6209566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
621a6053eceSJunchao Zhang       mumps->cur_ilen = nrow + 1;
622a6053eceSJunchao Zhang     }
623a6053eceSJunchao Zhang     if (nnz > mumps->cur_jlen) {
6249566063dSJacob Faibussowitsch       PetscCall(PetscFree(mumps->ja_alloc));
6259566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
626a6053eceSJunchao Zhang       mumps->cur_jlen = nnz;
627a6053eceSJunchao Zhang     }
628f4f49eeaSPierre Jolivet     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i]));
629f4f49eeaSPierre Jolivet     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i]));
630a6053eceSJunchao Zhang     *ia_mumps = mumps->ia_alloc;
631a6053eceSJunchao Zhang     *ja_mumps = mumps->ja_alloc;
632a6053eceSJunchao Zhang   }
633a6053eceSJunchao Zhang #else
634a6053eceSJunchao Zhang   *ia_mumps = ia;
635a6053eceSJunchao Zhang   *ja_mumps = ja;
636a6053eceSJunchao Zhang #endif
6379566063dSJacob Faibussowitsch   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
6383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
639a6053eceSJunchao Zhang }
640b24902e0SBarry Smith 
641d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
642d71ae5a4SJacob Faibussowitsch {
643b5fa320bSStefano Zampini   PetscFunctionBegin;
6449566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->id.listvar_schur));
645cf053153SJunchao Zhang   PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID()
6469566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->schur_sol));
64759ac8732SStefano Zampini   mumps->id.size_schur = 0;
648b3cb21ddSStefano Zampini   mumps->id.schur_lld  = 0;
649cf053153SJunchao Zhang   if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built
6503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
65159ac8732SStefano Zampini }
65259ac8732SStefano Zampini 
653b3cb21ddSStefano Zampini /* solve with rhs in mumps->id.redrhs and return in the same location */
654d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
655d71ae5a4SJacob Faibussowitsch {
656b3cb21ddSStefano Zampini   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
657cf053153SJunchao Zhang   Mat                  S, B, X; // solve S*X = B; all three matrices are dense
658b3cb21ddSStefano Zampini   MatFactorSchurStatus schurstatus;
659b3cb21ddSStefano Zampini   PetscInt             sizesol;
660cf053153SJunchao Zhang   const PetscScalar   *xarray;
66159ac8732SStefano Zampini 
66259ac8732SStefano Zampini   PetscFunctionBegin;
6639566063dSJacob Faibussowitsch   PetscCall(MatFactorFactorizeSchurComplement(F));
6649566063dSJacob Faibussowitsch   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
665cf053153SJunchao Zhang   PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs));
666cf053153SJunchao Zhang 
667cf053153SJunchao Zhang   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B));
6689566063dSJacob Faibussowitsch   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
669a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6709566063dSJacob Faibussowitsch   PetscCall(MatBindToCPU(B, S->boundtocpu));
671a3d589ffSStefano Zampini #endif
672b3cb21ddSStefano Zampini   switch (schurstatus) {
673d71ae5a4SJacob Faibussowitsch   case MAT_FACTOR_SCHUR_FACTORED:
674cf053153SJunchao Zhang     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X));
675d71ae5a4SJacob Faibussowitsch     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
676a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6779566063dSJacob Faibussowitsch     PetscCall(MatBindToCPU(X, S->boundtocpu));
678a3d589ffSStefano Zampini #endif
679b3cb21ddSStefano Zampini     if (!mumps->id.ICNTL(9)) { /* transpose solve */
6809566063dSJacob Faibussowitsch       PetscCall(MatMatSolveTranspose(S, B, X));
68159ac8732SStefano Zampini     } else {
6829566063dSJacob Faibussowitsch       PetscCall(MatMatSolve(S, B, X));
68359ac8732SStefano Zampini     }
684b3cb21ddSStefano Zampini     break;
685b3cb21ddSStefano Zampini   case MAT_FACTOR_SCHUR_INVERTED:
686b3cb21ddSStefano Zampini     sizesol = mumps->id.nrhs * mumps->id.size_schur;
68759ac8732SStefano Zampini     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
6889566063dSJacob Faibussowitsch       PetscCall(PetscFree(mumps->schur_sol));
6899566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
69059ac8732SStefano Zampini       mumps->schur_sizesol = sizesol;
691b5fa320bSStefano Zampini     }
6929566063dSJacob Faibussowitsch     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
6939566063dSJacob Faibussowitsch     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
694a3d589ffSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6959566063dSJacob Faibussowitsch     PetscCall(MatBindToCPU(X, S->boundtocpu));
696a3d589ffSStefano Zampini #endif
6979566063dSJacob Faibussowitsch     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
69859ac8732SStefano Zampini     if (!mumps->id.ICNTL(9)) { /* transpose solve */
6999566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
700b5fa320bSStefano Zampini     } else {
7019566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
702b5fa320bSStefano Zampini     }
7039566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions(X));
7049566063dSJacob Faibussowitsch     PetscCall(MatProductSymbolic(X));
7059566063dSJacob Faibussowitsch     PetscCall(MatProductNumeric(X));
7064417c5e8SHong Zhang 
7079566063dSJacob Faibussowitsch     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
708b3cb21ddSStefano Zampini     break;
709d71ae5a4SJacob Faibussowitsch   default:
710d71ae5a4SJacob Faibussowitsch     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
71159ac8732SStefano Zampini   }
712cf053153SJunchao Zhang   // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device.
713cf053153SJunchao Zhang   PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs.
714cf053153SJunchao Zhang   PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs));
715cf053153SJunchao Zhang   PetscCall(MatDenseRestoreArrayRead(X, &xarray));
7169566063dSJacob Faibussowitsch   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
7179566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
7189566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&X));
7193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
720b5fa320bSStefano Zampini }
721b5fa320bSStefano Zampini 
722d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
723d71ae5a4SJacob Faibussowitsch {
724b3cb21ddSStefano Zampini   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
725b5fa320bSStefano Zampini 
726b5fa320bSStefano Zampini   PetscFunctionBegin;
727b5fa320bSStefano Zampini   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
7283ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
729b5fa320bSStefano Zampini   }
730b8f61ee1SStefano Zampini   if (!expansion) { /* prepare for the condensation step */
731b5fa320bSStefano Zampini     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
732b5fa320bSStefano Zampini     /* allocate MUMPS internal array to store reduced right-hand sides */
733b5fa320bSStefano Zampini     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
734b5fa320bSStefano Zampini       mumps->id.lredrhs = mumps->id.size_schur;
735b5fa320bSStefano Zampini       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
736cf053153SJunchao Zhang       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
737cf053153SJunchao Zhang       PetscCall(PetscFree(mumps->redrhs));
738cf053153SJunchao Zhang       PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs));
739cf053153SJunchao Zhang       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs));
740b5fa320bSStefano Zampini     }
741b5fa320bSStefano Zampini   } else {                                    /* prepare for the expansion step */
742cf053153SJunchao Zhang     PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */
743b5fa320bSStefano Zampini     mumps->id.ICNTL(26) = 2;                  /* expansion phase */
7443ab56b82SJunchao Zhang     PetscMUMPS_c(mumps);
745cf053153SJunchao Zhang     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
746b5fa320bSStefano Zampini     /* restore defaults */
747b5fa320bSStefano Zampini     mumps->id.ICNTL(26) = -1;
748d3d598ffSStefano Zampini     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
749d3d598ffSStefano Zampini     if (mumps->id.nrhs > 1) {
750cf053153SJunchao Zhang       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
751cf053153SJunchao Zhang       PetscCall(PetscFree(mumps->redrhs));
752cf053153SJunchao Zhang       mumps->id.redrhs_len = 0;
753d3d598ffSStefano Zampini       mumps->id.lredrhs    = 0;
754d3d598ffSStefano Zampini       mumps->sizeredrhs    = 0;
755d3d598ffSStefano Zampini     }
756b5fa320bSStefano Zampini   }
7573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
758b5fa320bSStefano Zampini }
759b5fa320bSStefano Zampini 
760397b6df1SKris Buschelman /*
761f0b74427SPierre Jolivet   MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz]
762d341cd04SHong Zhang 
763397b6df1SKris Buschelman   input:
76475480915SPierre Jolivet     A       - matrix in aij,baij or sbaij format
765397b6df1SKris Buschelman     shift   - 0: C style output triple; 1: Fortran style output triple.
766bccb9932SShri Abhyankar     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
767bccb9932SShri Abhyankar               MAT_REUSE_MATRIX:   only the values in v array are updated
768397b6df1SKris Buschelman   output:
769397b6df1SKris Buschelman     nnz     - dim of r, c, and v (number of local nonzero entries of A)
770397b6df1SKris Buschelman     r, c, v - row and col index, matrix values (matrix triples)
771eb9baa12SBarry Smith 
772eb9baa12SBarry Smith   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
7737ee00b23SStefano Zampini   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
774eb9baa12SBarry Smith   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
775eb9baa12SBarry Smith 
776397b6df1SKris Buschelman  */
77716ebf90aSShri Abhyankar 
77866976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
779d71ae5a4SJacob Faibussowitsch {
780a3d589ffSStefano Zampini   const PetscScalar *av;
781185f6596SHong Zhang   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
7826497c311SBarry Smith   PetscCount         nz, rnz, k;
783a6053eceSJunchao Zhang   PetscMUMPSInt     *row, *col;
78416ebf90aSShri Abhyankar   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
785397b6df1SKris Buschelman 
786397b6df1SKris Buschelman   PetscFunctionBegin;
7879566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(A, &av));
788bccb9932SShri Abhyankar   if (reuse == MAT_INITIAL_MATRIX) {
7892205254eSKarl Rupp     nz = aa->nz;
7902205254eSKarl Rupp     ai = aa->i;
7912205254eSKarl Rupp     aj = aa->j;
7929566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nz, &row, nz, &col));
7936497c311SBarry Smith     for (PetscCount i = k = 0; i < M; i++) {
79416ebf90aSShri Abhyankar       rnz = ai[i + 1] - ai[i];
79567877ebaSShri Abhyankar       ajj = aj + ai[i];
7966497c311SBarry Smith       for (PetscCount j = 0; j < rnz; j++) {
7979566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
7989566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
799a6053eceSJunchao Zhang         k++;
80016ebf90aSShri Abhyankar       }
80116ebf90aSShri Abhyankar     }
80250c845baSStefano Zampini     mumps->val = (PetscScalar *)av;
803a6053eceSJunchao Zhang     mumps->irn = row;
804a6053eceSJunchao Zhang     mumps->jcn = col;
805a6053eceSJunchao Zhang     mumps->nnz = nz;
806127cd276SPierre Jolivet   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
807127cd276SPierre Jolivet   else mumps->val = (PetscScalar *)av;                                           /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
8089566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
8093ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
81016ebf90aSShri Abhyankar }
811397b6df1SKris Buschelman 
81266976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
813d71ae5a4SJacob Faibussowitsch {
8146497c311SBarry Smith   PetscCount     nz, i, j, k, r;
8157ee00b23SStefano Zampini   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
816a6053eceSJunchao Zhang   PetscMUMPSInt *row, *col;
8177ee00b23SStefano Zampini 
8187ee00b23SStefano Zampini   PetscFunctionBegin;
8197ee00b23SStefano Zampini   nz = a->sliidx[a->totalslices];
82050c845baSStefano Zampini   if (reuse == MAT_INITIAL_MATRIX) {
8219566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nz, &row, nz, &col));
822a6053eceSJunchao Zhang     for (i = k = 0; i < a->totalslices; i++) {
82348a46eb9SPierre Jolivet       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
8247ee00b23SStefano Zampini     }
8259566063dSJacob Faibussowitsch     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
826a6053eceSJunchao Zhang     mumps->irn = row;
827a6053eceSJunchao Zhang     mumps->jcn = col;
828a6053eceSJunchao Zhang     mumps->nnz = nz;
82950c845baSStefano Zampini     mumps->val = a->val;
830127cd276SPierre Jolivet   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
831127cd276SPierre Jolivet   else mumps->val = a->val;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
8323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
8337ee00b23SStefano Zampini }
8347ee00b23SStefano Zampini 
83566976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
836d71ae5a4SJacob Faibussowitsch {
83767877ebaSShri Abhyankar   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
83833d57670SJed Brown   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
8396497c311SBarry Smith   PetscCount      M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
840a6053eceSJunchao Zhang   PetscInt        bs;
841a6053eceSJunchao Zhang   PetscMUMPSInt  *row, *col;
84267877ebaSShri Abhyankar 
84367877ebaSShri Abhyankar   PetscFunctionBegin;
84450c845baSStefano Zampini   if (reuse == MAT_INITIAL_MATRIX) {
8459566063dSJacob Faibussowitsch     PetscCall(MatGetBlockSize(A, &bs));
84633d57670SJed Brown     M  = A->rmap->N / bs;
8479371c9d4SSatish Balay     ai = aa->i;
8489371c9d4SSatish Balay     aj = aa->j;
8499566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nz, &row, nz, &col));
85067877ebaSShri Abhyankar     for (i = 0; i < M; i++) {
85167877ebaSShri Abhyankar       ajj = aj + ai[i];
85267877ebaSShri Abhyankar       rnz = ai[i + 1] - ai[i];
85367877ebaSShri Abhyankar       for (k = 0; k < rnz; k++) {
85467877ebaSShri Abhyankar         for (j = 0; j < bs; j++) {
85567877ebaSShri Abhyankar           for (m = 0; m < bs; m++) {
8569566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
8579566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
858a6053eceSJunchao Zhang             idx++;
85967877ebaSShri Abhyankar           }
86067877ebaSShri Abhyankar         }
86167877ebaSShri Abhyankar       }
86267877ebaSShri Abhyankar     }
863a6053eceSJunchao Zhang     mumps->irn = row;
864a6053eceSJunchao Zhang     mumps->jcn = col;
865a6053eceSJunchao Zhang     mumps->nnz = nz;
86650c845baSStefano Zampini     mumps->val = aa->a;
867127cd276SPierre Jolivet   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
868127cd276SPierre Jolivet   else mumps->val = aa->a;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
8693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
87067877ebaSShri Abhyankar }
87167877ebaSShri Abhyankar 
87266976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
873d71ae5a4SJacob Faibussowitsch {
87475480915SPierre Jolivet   const PetscInt *ai, *aj, *ajj;
875a6053eceSJunchao Zhang   PetscInt        bs;
8766497c311SBarry Smith   PetscCount      nz, rnz, i, j, k, m;
877a6053eceSJunchao Zhang   PetscMUMPSInt  *row, *col;
87875480915SPierre Jolivet   PetscScalar    *val;
87916ebf90aSShri Abhyankar   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
88075480915SPierre Jolivet   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
88138548759SBarry Smith #if defined(PETSC_USE_COMPLEX)
882b94d7dedSBarry Smith   PetscBool isset, hermitian;
88338548759SBarry Smith #endif
88416ebf90aSShri Abhyankar 
88516ebf90aSShri Abhyankar   PetscFunctionBegin;
88638548759SBarry Smith #if defined(PETSC_USE_COMPLEX)
887b94d7dedSBarry Smith   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
888b94d7dedSBarry Smith   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
88938548759SBarry Smith #endif
8902205254eSKarl Rupp   ai = aa->i;
8912205254eSKarl Rupp   aj = aa->j;
8929566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(A, &bs));
89375480915SPierre Jolivet   if (reuse == MAT_INITIAL_MATRIX) {
8946497c311SBarry Smith     const PetscCount alloc_size = aa->nz * bs2;
895f3fa974cSJacob Faibussowitsch 
896f3fa974cSJacob Faibussowitsch     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
897a6053eceSJunchao Zhang     if (bs > 1) {
898f3fa974cSJacob Faibussowitsch       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
899a6053eceSJunchao Zhang       mumps->val = mumps->val_alloc;
90075480915SPierre Jolivet     } else {
901a6053eceSJunchao Zhang       mumps->val = aa->a;
90275480915SPierre Jolivet     }
903a6053eceSJunchao Zhang     mumps->irn = row;
904a6053eceSJunchao Zhang     mumps->jcn = col;
905a6053eceSJunchao Zhang   } else {
906a6053eceSJunchao Zhang     row = mumps->irn;
907a6053eceSJunchao Zhang     col = mumps->jcn;
908a6053eceSJunchao Zhang   }
909a6053eceSJunchao Zhang   val = mumps->val;
910185f6596SHong Zhang 
91116ebf90aSShri Abhyankar   nz = 0;
912a81fe166SPierre Jolivet   if (bs > 1) {
91375480915SPierre Jolivet     for (i = 0; i < mbs; i++) {
91416ebf90aSShri Abhyankar       rnz = ai[i + 1] - ai[i];
91567877ebaSShri Abhyankar       ajj = aj + ai[i];
91675480915SPierre Jolivet       for (j = 0; j < rnz; j++) {
91775480915SPierre Jolivet         for (k = 0; k < bs; k++) {
91875480915SPierre Jolivet           for (m = 0; m < bs; m++) {
919ec4f40fdSPierre Jolivet             if (ajj[j] > i || k >= m) {
92075480915SPierre Jolivet               if (reuse == MAT_INITIAL_MATRIX) {
9219566063dSJacob Faibussowitsch                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
9229566063dSJacob Faibussowitsch                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
92375480915SPierre Jolivet               }
92475480915SPierre Jolivet               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
92575480915SPierre Jolivet             }
92675480915SPierre Jolivet           }
92775480915SPierre Jolivet         }
92875480915SPierre Jolivet       }
92975480915SPierre Jolivet     }
930a81fe166SPierre Jolivet   } else if (reuse == MAT_INITIAL_MATRIX) {
931a81fe166SPierre Jolivet     for (i = 0; i < mbs; i++) {
932a81fe166SPierre Jolivet       rnz = ai[i + 1] - ai[i];
933a81fe166SPierre Jolivet       ajj = aj + ai[i];
934a81fe166SPierre Jolivet       for (j = 0; j < rnz; j++) {
9359566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
9369566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
937a6053eceSJunchao Zhang         nz++;
938a81fe166SPierre Jolivet       }
939a81fe166SPierre Jolivet     }
9406497c311SBarry Smith     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz);
941127cd276SPierre Jolivet   } else if (mumps->nest_vals)
942127cd276SPierre Jolivet     PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
943127cd276SPierre Jolivet   else mumps->val = aa->a;                               /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
944a6053eceSJunchao Zhang   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
9453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
94616ebf90aSShri Abhyankar }
94716ebf90aSShri Abhyankar 
94866976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
949d71ae5a4SJacob Faibussowitsch {
95067877ebaSShri Abhyankar   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
9516497c311SBarry Smith   PetscCount         nz, rnz, i, j;
95267877ebaSShri Abhyankar   const PetscScalar *av, *v1;
95316ebf90aSShri Abhyankar   PetscScalar       *val;
954a6053eceSJunchao Zhang   PetscMUMPSInt     *row, *col;
955829b1710SHong Zhang   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
956421480d9SBarry Smith   PetscBool          diagDense;
95738548759SBarry Smith #if defined(PETSC_USE_COMPLEX)
958b94d7dedSBarry Smith   PetscBool hermitian, isset;
95938548759SBarry Smith #endif
96016ebf90aSShri Abhyankar 
96116ebf90aSShri Abhyankar   PetscFunctionBegin;
96238548759SBarry Smith #if defined(PETSC_USE_COMPLEX)
963b94d7dedSBarry Smith   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
964b94d7dedSBarry Smith   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
96538548759SBarry Smith #endif
9669566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(A, &av));
9679371c9d4SSatish Balay   ai = aa->i;
9689371c9d4SSatish Balay   aj = aa->j;
969421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
970bccb9932SShri Abhyankar   if (reuse == MAT_INITIAL_MATRIX) {
9717ee00b23SStefano Zampini     /* count nz in the upper triangular part of A */
972829b1710SHong Zhang     nz = 0;
973421480d9SBarry Smith     if (!diagDense) {
97429b521d4Sstefano_zampini       for (i = 0; i < M; i++) {
97529b521d4Sstefano_zampini         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
97629b521d4Sstefano_zampini           for (j = ai[i]; j < ai[i + 1]; j++) {
97729b521d4Sstefano_zampini             if (aj[j] < i) continue;
97829b521d4Sstefano_zampini             nz++;
97929b521d4Sstefano_zampini           }
98029b521d4Sstefano_zampini         } else {
98129b521d4Sstefano_zampini           nz += ai[i + 1] - adiag[i];
98229b521d4Sstefano_zampini         }
98329b521d4Sstefano_zampini       }
98429b521d4Sstefano_zampini     } else {
985829b1710SHong Zhang       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
98629b521d4Sstefano_zampini     }
9879566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nz, &row, nz, &col));
9889566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nz, &val));
989a6053eceSJunchao Zhang     mumps->nnz = nz;
990a6053eceSJunchao Zhang     mumps->irn = row;
991a6053eceSJunchao Zhang     mumps->jcn = col;
992a6053eceSJunchao Zhang     mumps->val = mumps->val_alloc = val;
993185f6596SHong Zhang 
99416ebf90aSShri Abhyankar     nz = 0;
995421480d9SBarry Smith     if (!diagDense) {
99629b521d4Sstefano_zampini       for (i = 0; i < M; i++) {
99729b521d4Sstefano_zampini         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
99829b521d4Sstefano_zampini           for (j = ai[i]; j < ai[i + 1]; j++) {
99929b521d4Sstefano_zampini             if (aj[j] < i) continue;
10009566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
10019566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
100229b521d4Sstefano_zampini             val[nz] = av[j];
100329b521d4Sstefano_zampini             nz++;
100429b521d4Sstefano_zampini           }
100529b521d4Sstefano_zampini         } else {
100629b521d4Sstefano_zampini           rnz = ai[i + 1] - adiag[i];
100729b521d4Sstefano_zampini           ajj = aj + adiag[i];
100829b521d4Sstefano_zampini           v1  = av + adiag[i];
100929b521d4Sstefano_zampini           for (j = 0; j < rnz; j++) {
10109566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
10119566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1012a6053eceSJunchao Zhang             val[nz++] = v1[j];
101329b521d4Sstefano_zampini           }
101429b521d4Sstefano_zampini         }
101529b521d4Sstefano_zampini       }
101629b521d4Sstefano_zampini     } else {
101716ebf90aSShri Abhyankar       for (i = 0; i < M; i++) {
101816ebf90aSShri Abhyankar         rnz = ai[i + 1] - adiag[i];
101967877ebaSShri Abhyankar         ajj = aj + adiag[i];
1020cf3759fdSShri Abhyankar         v1  = av + adiag[i];
102167877ebaSShri Abhyankar         for (j = 0; j < rnz; j++) {
10229566063dSJacob Faibussowitsch           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
10239566063dSJacob Faibussowitsch           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1024a6053eceSJunchao Zhang           val[nz++] = v1[j];
102516ebf90aSShri Abhyankar         }
102616ebf90aSShri Abhyankar       }
102729b521d4Sstefano_zampini     }
1028397b6df1SKris Buschelman   } else {
1029a6053eceSJunchao Zhang     nz  = 0;
1030a6053eceSJunchao Zhang     val = mumps->val;
1031421480d9SBarry Smith     if (!diagDense) {
103216ebf90aSShri Abhyankar       for (i = 0; i < M; i++) {
103329b521d4Sstefano_zampini         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
103429b521d4Sstefano_zampini           for (j = ai[i]; j < ai[i + 1]; j++) {
103529b521d4Sstefano_zampini             if (aj[j] < i) continue;
103629b521d4Sstefano_zampini             val[nz++] = av[j];
103729b521d4Sstefano_zampini           }
103829b521d4Sstefano_zampini         } else {
103916ebf90aSShri Abhyankar           rnz = ai[i + 1] - adiag[i];
104067877ebaSShri Abhyankar           v1  = av + adiag[i];
1041ad540459SPierre Jolivet           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
104216ebf90aSShri Abhyankar         }
104316ebf90aSShri Abhyankar       }
104429b521d4Sstefano_zampini     } else {
104516ebf90aSShri Abhyankar       for (i = 0; i < M; i++) {
104616ebf90aSShri Abhyankar         rnz = ai[i + 1] - adiag[i];
104716ebf90aSShri Abhyankar         v1  = av + adiag[i];
1048ad540459SPierre Jolivet         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
104916ebf90aSShri Abhyankar       }
105016ebf90aSShri Abhyankar     }
105129b521d4Sstefano_zampini   }
10529566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
10533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
105416ebf90aSShri Abhyankar }
105516ebf90aSShri Abhyankar 
105666976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1057d71ae5a4SJacob Faibussowitsch {
1058a6053eceSJunchao Zhang   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
1059a6053eceSJunchao Zhang   PetscInt           bs;
10606497c311SBarry Smith   PetscCount         rstart, nz, i, j, k, m, jj, irow, countA, countB;
1061a6053eceSJunchao Zhang   PetscMUMPSInt     *row, *col;
106216ebf90aSShri Abhyankar   const PetscScalar *av, *bv, *v1, *v2;
106316ebf90aSShri Abhyankar   PetscScalar       *val;
1064397b6df1SKris Buschelman   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
1065f4f49eeaSPierre Jolivet   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)mat->A->data;
1066f4f49eeaSPierre Jolivet   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1067ec4f40fdSPierre Jolivet   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
106838548759SBarry Smith #if defined(PETSC_USE_COMPLEX)
1069b94d7dedSBarry Smith   PetscBool hermitian, isset;
107038548759SBarry Smith #endif
107116ebf90aSShri Abhyankar 
107216ebf90aSShri Abhyankar   PetscFunctionBegin;
107338548759SBarry Smith #if defined(PETSC_USE_COMPLEX)
1074b94d7dedSBarry Smith   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1075b94d7dedSBarry Smith   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
107638548759SBarry Smith #endif
10779566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(A, &bs));
107838548759SBarry Smith   rstart = A->rmap->rstart;
107938548759SBarry Smith   ai     = aa->i;
108038548759SBarry Smith   aj     = aa->j;
108138548759SBarry Smith   bi     = bb->i;
108238548759SBarry Smith   bj     = bb->j;
108338548759SBarry Smith   av     = aa->a;
108438548759SBarry Smith   bv     = bb->a;
1085397b6df1SKris Buschelman 
10862205254eSKarl Rupp   garray = mat->garray;
10872205254eSKarl Rupp 
1088bccb9932SShri Abhyankar   if (reuse == MAT_INITIAL_MATRIX) {
1089a6053eceSJunchao Zhang     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
10909566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nz, &row, nz, &col));
10919566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nz, &val));
1092a6053eceSJunchao Zhang     /* can not decide the exact mumps->nnz now because of the SBAIJ */
1093a6053eceSJunchao Zhang     mumps->irn = row;
1094a6053eceSJunchao Zhang     mumps->jcn = col;
1095a6053eceSJunchao Zhang     mumps->val = mumps->val_alloc = val;
1096397b6df1SKris Buschelman   } else {
1097a6053eceSJunchao Zhang     val = mumps->val;
1098397b6df1SKris Buschelman   }
1099397b6df1SKris Buschelman 
11009371c9d4SSatish Balay   jj   = 0;
11019371c9d4SSatish Balay   irow = rstart;
1102ec4f40fdSPierre Jolivet   for (i = 0; i < mbs; i++) {
1103397b6df1SKris Buschelman     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1104397b6df1SKris Buschelman     countA = ai[i + 1] - ai[i];
1105397b6df1SKris Buschelman     countB = bi[i + 1] - bi[i];
1106397b6df1SKris Buschelman     bjj    = bj + bi[i];
1107ec4f40fdSPierre Jolivet     v1     = av + ai[i] * bs2;
1108ec4f40fdSPierre Jolivet     v2     = bv + bi[i] * bs2;
1109397b6df1SKris Buschelman 
1110ec4f40fdSPierre Jolivet     if (bs > 1) {
1111ec4f40fdSPierre Jolivet       /* A-part */
1112ec4f40fdSPierre Jolivet       for (j = 0; j < countA; j++) {
1113ec4f40fdSPierre Jolivet         for (k = 0; k < bs; k++) {
1114ec4f40fdSPierre Jolivet           for (m = 0; m < bs; m++) {
1115ec4f40fdSPierre Jolivet             if (rstart + ajj[j] * bs > irow || k >= m) {
1116ec4f40fdSPierre Jolivet               if (reuse == MAT_INITIAL_MATRIX) {
11179566063dSJacob Faibussowitsch                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
11189566063dSJacob Faibussowitsch                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
1119ec4f40fdSPierre Jolivet               }
1120ec4f40fdSPierre Jolivet               val[jj++] = v1[j * bs2 + m + k * bs];
1121ec4f40fdSPierre Jolivet             }
1122ec4f40fdSPierre Jolivet           }
1123ec4f40fdSPierre Jolivet         }
1124ec4f40fdSPierre Jolivet       }
1125ec4f40fdSPierre Jolivet 
1126ec4f40fdSPierre Jolivet       /* B-part */
1127ec4f40fdSPierre Jolivet       for (j = 0; j < countB; j++) {
1128ec4f40fdSPierre Jolivet         for (k = 0; k < bs; k++) {
1129ec4f40fdSPierre Jolivet           for (m = 0; m < bs; m++) {
1130ec4f40fdSPierre Jolivet             if (reuse == MAT_INITIAL_MATRIX) {
11319566063dSJacob Faibussowitsch               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
11329566063dSJacob Faibussowitsch               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
1133ec4f40fdSPierre Jolivet             }
1134ec4f40fdSPierre Jolivet             val[jj++] = v2[j * bs2 + m + k * bs];
1135ec4f40fdSPierre Jolivet           }
1136ec4f40fdSPierre Jolivet         }
1137ec4f40fdSPierre Jolivet       }
1138ec4f40fdSPierre Jolivet     } else {
1139397b6df1SKris Buschelman       /* A-part */
1140397b6df1SKris Buschelman       for (j = 0; j < countA; j++) {
1141bccb9932SShri Abhyankar         if (reuse == MAT_INITIAL_MATRIX) {
11429566063dSJacob Faibussowitsch           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
11439566063dSJacob Faibussowitsch           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1144397b6df1SKris Buschelman         }
114516ebf90aSShri Abhyankar         val[jj++] = v1[j];
1146397b6df1SKris Buschelman       }
114716ebf90aSShri Abhyankar 
114816ebf90aSShri Abhyankar       /* B-part */
114916ebf90aSShri Abhyankar       for (j = 0; j < countB; j++) {
1150bccb9932SShri Abhyankar         if (reuse == MAT_INITIAL_MATRIX) {
11519566063dSJacob Faibussowitsch           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
11529566063dSJacob Faibussowitsch           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1153397b6df1SKris Buschelman         }
115416ebf90aSShri Abhyankar         val[jj++] = v2[j];
115516ebf90aSShri Abhyankar       }
115616ebf90aSShri Abhyankar     }
1157ec4f40fdSPierre Jolivet     irow += bs;
1158ec4f40fdSPierre Jolivet   }
11595d955bbbSStefano Zampini   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
11603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
116116ebf90aSShri Abhyankar }
116216ebf90aSShri Abhyankar 
116366976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1164d71ae5a4SJacob Faibussowitsch {
116516ebf90aSShri Abhyankar   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
11666497c311SBarry Smith   PetscCount         rstart, cstart, nz, i, j, jj, irow, countA, countB;
1167a6053eceSJunchao Zhang   PetscMUMPSInt     *row, *col;
116816ebf90aSShri Abhyankar   const PetscScalar *av, *bv, *v1, *v2;
116916ebf90aSShri Abhyankar   PetscScalar       *val;
1170a3d589ffSStefano Zampini   Mat                Ad, Ao;
1171a3d589ffSStefano Zampini   Mat_SeqAIJ        *aa;
1172a3d589ffSStefano Zampini   Mat_SeqAIJ        *bb;
117316ebf90aSShri Abhyankar 
117416ebf90aSShri Abhyankar   PetscFunctionBegin;
11759566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
11769566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
11779566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1178a3d589ffSStefano Zampini 
117957508eceSPierre Jolivet   aa = (Mat_SeqAIJ *)Ad->data;
118057508eceSPierre Jolivet   bb = (Mat_SeqAIJ *)Ao->data;
118138548759SBarry Smith   ai = aa->i;
118238548759SBarry Smith   aj = aa->j;
118338548759SBarry Smith   bi = bb->i;
118438548759SBarry Smith   bj = bb->j;
118516ebf90aSShri Abhyankar 
1186a3d589ffSStefano Zampini   rstart = A->rmap->rstart;
11875d955bbbSStefano Zampini   cstart = A->cmap->rstart;
11882205254eSKarl Rupp 
1189bccb9932SShri Abhyankar   if (reuse == MAT_INITIAL_MATRIX) {
11906497c311SBarry Smith     nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
11919566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nz, &row, nz, &col));
11929566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nz, &val));
1193a6053eceSJunchao Zhang     mumps->nnz = nz;
1194a6053eceSJunchao Zhang     mumps->irn = row;
1195a6053eceSJunchao Zhang     mumps->jcn = col;
1196a6053eceSJunchao Zhang     mumps->val = mumps->val_alloc = val;
119716ebf90aSShri Abhyankar   } else {
1198a6053eceSJunchao Zhang     val = mumps->val;
119916ebf90aSShri Abhyankar   }
120016ebf90aSShri Abhyankar 
12019371c9d4SSatish Balay   jj   = 0;
12029371c9d4SSatish Balay   irow = rstart;
120316ebf90aSShri Abhyankar   for (i = 0; i < m; i++) {
120416ebf90aSShri Abhyankar     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
120516ebf90aSShri Abhyankar     countA = ai[i + 1] - ai[i];
120616ebf90aSShri Abhyankar     countB = bi[i + 1] - bi[i];
120716ebf90aSShri Abhyankar     bjj    = bj + bi[i];
120816ebf90aSShri Abhyankar     v1     = av + ai[i];
120916ebf90aSShri Abhyankar     v2     = bv + bi[i];
121016ebf90aSShri Abhyankar 
121116ebf90aSShri Abhyankar     /* A-part */
121216ebf90aSShri Abhyankar     for (j = 0; j < countA; j++) {
1213bccb9932SShri Abhyankar       if (reuse == MAT_INITIAL_MATRIX) {
12149566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
12155d955bbbSStefano Zampini         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
121616ebf90aSShri Abhyankar       }
121716ebf90aSShri Abhyankar       val[jj++] = v1[j];
121816ebf90aSShri Abhyankar     }
121916ebf90aSShri Abhyankar 
122016ebf90aSShri Abhyankar     /* B-part */
122116ebf90aSShri Abhyankar     for (j = 0; j < countB; j++) {
1222bccb9932SShri Abhyankar       if (reuse == MAT_INITIAL_MATRIX) {
12239566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
12249566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
122516ebf90aSShri Abhyankar       }
122616ebf90aSShri Abhyankar       val[jj++] = v2[j];
122716ebf90aSShri Abhyankar     }
122816ebf90aSShri Abhyankar     irow++;
122916ebf90aSShri Abhyankar   }
12309566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
12319566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
12323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
123316ebf90aSShri Abhyankar }
123416ebf90aSShri Abhyankar 
123566976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1236d71ae5a4SJacob Faibussowitsch {
123767877ebaSShri Abhyankar   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
1238f4f49eeaSPierre Jolivet   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)mat->A->data;
1239f4f49eeaSPierre Jolivet   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
124067877ebaSShri Abhyankar   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
12415d955bbbSStefano Zampini   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
124233d57670SJed Brown   const PetscInt     bs2 = mat->bs2;
1243a6053eceSJunchao Zhang   PetscInt           bs;
12446497c311SBarry Smith   PetscCount         nz, i, j, k, n, jj, irow, countA, countB, idx;
1245a6053eceSJunchao Zhang   PetscMUMPSInt     *row, *col;
124667877ebaSShri Abhyankar   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
124767877ebaSShri Abhyankar   PetscScalar       *val;
124867877ebaSShri Abhyankar 
124967877ebaSShri Abhyankar   PetscFunctionBegin;
12509566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(A, &bs));
1251bccb9932SShri Abhyankar   if (reuse == MAT_INITIAL_MATRIX) {
125267877ebaSShri Abhyankar     nz = bs2 * (aa->nz + bb->nz);
12539566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nz, &row, nz, &col));
12549566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nz, &val));
1255a6053eceSJunchao Zhang     mumps->nnz = nz;
1256a6053eceSJunchao Zhang     mumps->irn = row;
1257a6053eceSJunchao Zhang     mumps->jcn = col;
1258a6053eceSJunchao Zhang     mumps->val = mumps->val_alloc = val;
125967877ebaSShri Abhyankar   } else {
1260a6053eceSJunchao Zhang     val = mumps->val;
126167877ebaSShri Abhyankar   }
126267877ebaSShri Abhyankar 
12639371c9d4SSatish Balay   jj   = 0;
12649371c9d4SSatish Balay   irow = rstart;
126567877ebaSShri Abhyankar   for (i = 0; i < mbs; i++) {
126667877ebaSShri Abhyankar     countA = ai[i + 1] - ai[i];
126767877ebaSShri Abhyankar     countB = bi[i + 1] - bi[i];
126867877ebaSShri Abhyankar     ajj    = aj + ai[i];
126967877ebaSShri Abhyankar     bjj    = bj + bi[i];
127067877ebaSShri Abhyankar     v1     = av + bs2 * ai[i];
127167877ebaSShri Abhyankar     v2     = bv + bs2 * bi[i];
127267877ebaSShri Abhyankar 
127367877ebaSShri Abhyankar     idx = 0;
127467877ebaSShri Abhyankar     /* A-part */
127567877ebaSShri Abhyankar     for (k = 0; k < countA; k++) {
127667877ebaSShri Abhyankar       for (j = 0; j < bs; j++) {
127767877ebaSShri Abhyankar         for (n = 0; n < bs; n++) {
1278bccb9932SShri Abhyankar           if (reuse == MAT_INITIAL_MATRIX) {
12799566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
12805d955bbbSStefano Zampini             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
128167877ebaSShri Abhyankar           }
128267877ebaSShri Abhyankar           val[jj++] = v1[idx++];
128367877ebaSShri Abhyankar         }
128467877ebaSShri Abhyankar       }
128567877ebaSShri Abhyankar     }
128667877ebaSShri Abhyankar 
128767877ebaSShri Abhyankar     idx = 0;
128867877ebaSShri Abhyankar     /* B-part */
128967877ebaSShri Abhyankar     for (k = 0; k < countB; k++) {
129067877ebaSShri Abhyankar       for (j = 0; j < bs; j++) {
129167877ebaSShri Abhyankar         for (n = 0; n < bs; n++) {
1292bccb9932SShri Abhyankar           if (reuse == MAT_INITIAL_MATRIX) {
12939566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
12949566063dSJacob Faibussowitsch             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
129567877ebaSShri Abhyankar           }
1296d985c460SShri Abhyankar           val[jj++] = v2[idx++];
129767877ebaSShri Abhyankar         }
129867877ebaSShri Abhyankar       }
129967877ebaSShri Abhyankar     }
1300d985c460SShri Abhyankar     irow += bs;
130167877ebaSShri Abhyankar   }
13023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
130367877ebaSShri Abhyankar }
130467877ebaSShri Abhyankar 
130566976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1306d71ae5a4SJacob Faibussowitsch {
130716ebf90aSShri Abhyankar   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
13086497c311SBarry Smith   PetscCount         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
1309a6053eceSJunchao Zhang   PetscMUMPSInt     *row, *col;
131016ebf90aSShri Abhyankar   const PetscScalar *av, *bv, *v1, *v2;
131116ebf90aSShri Abhyankar   PetscScalar       *val;
1312a3d589ffSStefano Zampini   Mat                Ad, Ao;
1313a3d589ffSStefano Zampini   Mat_SeqAIJ        *aa;
1314a3d589ffSStefano Zampini   Mat_SeqAIJ        *bb;
131538548759SBarry Smith #if defined(PETSC_USE_COMPLEX)
1316b94d7dedSBarry Smith   PetscBool hermitian, isset;
131738548759SBarry Smith #endif
131816ebf90aSShri Abhyankar 
131916ebf90aSShri Abhyankar   PetscFunctionBegin;
132038548759SBarry Smith #if defined(PETSC_USE_COMPLEX)
1321b94d7dedSBarry Smith   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1322b94d7dedSBarry Smith   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
132338548759SBarry Smith #endif
13249566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
13259566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
13269566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1327a3d589ffSStefano Zampini 
132857508eceSPierre Jolivet   aa = (Mat_SeqAIJ *)Ad->data;
132957508eceSPierre Jolivet   bb = (Mat_SeqAIJ *)Ao->data;
133038548759SBarry Smith   ai = aa->i;
133138548759SBarry Smith   aj = aa->j;
133238548759SBarry Smith   bi = bb->i;
133338548759SBarry Smith   bj = bb->j;
1334421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(Ad, &adiag, NULL));
133516ebf90aSShri Abhyankar   rstart = A->rmap->rstart;
133616ebf90aSShri Abhyankar 
1337bccb9932SShri Abhyankar   if (reuse == MAT_INITIAL_MATRIX) {
1338e0bace9bSHong Zhang     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
1339e0bace9bSHong Zhang     nzb = 0; /* num of upper triangular entries in mat->B */
134016ebf90aSShri Abhyankar     for (i = 0; i < m; i++) {
1341e0bace9bSHong Zhang       nza += (ai[i + 1] - adiag[i]);
134216ebf90aSShri Abhyankar       countB = bi[i + 1] - bi[i];
134316ebf90aSShri Abhyankar       bjj    = bj + bi[i];
1344e0bace9bSHong Zhang       for (j = 0; j < countB; j++) {
1345e0bace9bSHong Zhang         if (garray[bjj[j]] > rstart) nzb++;
1346e0bace9bSHong Zhang       }
1347e0bace9bSHong Zhang     }
134816ebf90aSShri Abhyankar 
1349e0bace9bSHong Zhang     nz = nza + nzb; /* total nz of upper triangular part of mat */
13509566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nz, &row, nz, &col));
13519566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nz, &val));
1352a6053eceSJunchao Zhang     mumps->nnz = nz;
1353a6053eceSJunchao Zhang     mumps->irn = row;
1354a6053eceSJunchao Zhang     mumps->jcn = col;
1355a6053eceSJunchao Zhang     mumps->val = mumps->val_alloc = val;
135616ebf90aSShri Abhyankar   } else {
1357a6053eceSJunchao Zhang     val = mumps->val;
135816ebf90aSShri Abhyankar   }
135916ebf90aSShri Abhyankar 
13609371c9d4SSatish Balay   jj   = 0;
13619371c9d4SSatish Balay   irow = rstart;
136216ebf90aSShri Abhyankar   for (i = 0; i < m; i++) {
136316ebf90aSShri Abhyankar     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
136416ebf90aSShri Abhyankar     v1     = av + adiag[i];
136516ebf90aSShri Abhyankar     countA = ai[i + 1] - adiag[i];
136616ebf90aSShri Abhyankar     countB = bi[i + 1] - bi[i];
136716ebf90aSShri Abhyankar     bjj    = bj + bi[i];
136816ebf90aSShri Abhyankar     v2     = bv + bi[i];
136916ebf90aSShri Abhyankar 
137016ebf90aSShri Abhyankar     /* A-part */
137116ebf90aSShri Abhyankar     for (j = 0; j < countA; j++) {
1372bccb9932SShri Abhyankar       if (reuse == MAT_INITIAL_MATRIX) {
13739566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
13749566063dSJacob Faibussowitsch         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
137516ebf90aSShri Abhyankar       }
137616ebf90aSShri Abhyankar       val[jj++] = v1[j];
137716ebf90aSShri Abhyankar     }
137816ebf90aSShri Abhyankar 
137916ebf90aSShri Abhyankar     /* B-part */
138016ebf90aSShri Abhyankar     for (j = 0; j < countB; j++) {
138116ebf90aSShri Abhyankar       if (garray[bjj[j]] > rstart) {
1382bccb9932SShri Abhyankar         if (reuse == MAT_INITIAL_MATRIX) {
13839566063dSJacob Faibussowitsch           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
13849566063dSJacob Faibussowitsch           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
138516ebf90aSShri Abhyankar         }
138616ebf90aSShri Abhyankar         val[jj++] = v2[j];
138716ebf90aSShri Abhyankar       }
1388397b6df1SKris Buschelman     }
1389397b6df1SKris Buschelman     irow++;
1390397b6df1SKris Buschelman   }
13919566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
13929566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
13933ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1394397b6df1SKris Buschelman }
1395397b6df1SKris Buschelman 
1396d2a308c1SPierre Jolivet static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1397c3e1b152SPierre Jolivet {
1398c3e1b152SPierre Jolivet   const PetscScalar *av;
1399c3e1b152SPierre Jolivet   const PetscInt     M = A->rmap->n;
14006497c311SBarry Smith   PetscCount         i;
1401c3e1b152SPierre Jolivet   PetscMUMPSInt     *row, *col;
1402c3e1b152SPierre Jolivet   Vec                v;
1403c3e1b152SPierre Jolivet 
1404c3e1b152SPierre Jolivet   PetscFunctionBegin;
1405c3e1b152SPierre Jolivet   PetscCall(MatDiagonalGetDiagonal(A, &v));
1406c3e1b152SPierre Jolivet   PetscCall(VecGetArrayRead(v, &av));
1407c3e1b152SPierre Jolivet   if (reuse == MAT_INITIAL_MATRIX) {
1408c3e1b152SPierre Jolivet     PetscCall(PetscMalloc2(M, &row, M, &col));
1409c3e1b152SPierre Jolivet     for (i = 0; i < M; i++) {
1410c3e1b152SPierre Jolivet       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1411c3e1b152SPierre Jolivet       col[i] = row[i];
1412c3e1b152SPierre Jolivet     }
1413c3e1b152SPierre Jolivet     mumps->val = (PetscScalar *)av;
1414c3e1b152SPierre Jolivet     mumps->irn = row;
1415c3e1b152SPierre Jolivet     mumps->jcn = col;
1416c3e1b152SPierre Jolivet     mumps->nnz = M;
1417127cd276SPierre Jolivet   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1418127cd276SPierre Jolivet   else mumps->val = (PetscScalar *)av;                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1419c3e1b152SPierre Jolivet   PetscCall(VecRestoreArrayRead(v, &av));
1420c3e1b152SPierre Jolivet   PetscFunctionReturn(PETSC_SUCCESS);
1421c3e1b152SPierre Jolivet }
1422c3e1b152SPierre Jolivet 
1423d2a308c1SPierre Jolivet static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
14244b9405b2SPierre Jolivet {
14254b9405b2SPierre Jolivet   PetscScalar   *v;
14264b9405b2SPierre Jolivet   const PetscInt m = A->rmap->n, N = A->cmap->N;
14274b9405b2SPierre Jolivet   PetscInt       lda;
14286497c311SBarry Smith   PetscCount     i, j;
14294b9405b2SPierre Jolivet   PetscMUMPSInt *row, *col;
14304b9405b2SPierre Jolivet 
14314b9405b2SPierre Jolivet   PetscFunctionBegin;
14324b9405b2SPierre Jolivet   PetscCall(MatDenseGetArray(A, &v));
14334b9405b2SPierre Jolivet   PetscCall(MatDenseGetLDA(A, &lda));
14344b9405b2SPierre Jolivet   if (reuse == MAT_INITIAL_MATRIX) {
14354b9405b2SPierre Jolivet     PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
14364b9405b2SPierre Jolivet     for (i = 0; i < m; i++) {
14374b9405b2SPierre Jolivet       col[i] = 0;
14384b9405b2SPierre Jolivet       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
14394b9405b2SPierre Jolivet     }
14404b9405b2SPierre Jolivet     for (j = 1; j < N; j++) {
14414b9405b2SPierre Jolivet       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
14424b9405b2SPierre Jolivet       PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
14434b9405b2SPierre Jolivet     }
14444b9405b2SPierre Jolivet     if (lda == m) mumps->val = v;
14454b9405b2SPierre Jolivet     else {
14464b9405b2SPierre Jolivet       PetscCall(PetscMalloc1(m * N, &mumps->val));
14474b9405b2SPierre Jolivet       mumps->val_alloc = mumps->val;
14484b9405b2SPierre Jolivet       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
14494b9405b2SPierre Jolivet     }
14504b9405b2SPierre Jolivet     mumps->irn = row;
14514b9405b2SPierre Jolivet     mumps->jcn = col;
14524b9405b2SPierre Jolivet     mumps->nnz = m * N;
14534b9405b2SPierre Jolivet   } else {
14544b9405b2SPierre Jolivet     if (lda == m && !mumps->nest_vals) mumps->val = v;
14554b9405b2SPierre Jolivet     else {
14564b9405b2SPierre Jolivet       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
14574b9405b2SPierre Jolivet     }
14584b9405b2SPierre Jolivet   }
14594b9405b2SPierre Jolivet   PetscCall(MatDenseRestoreArray(A, &v));
14604b9405b2SPierre Jolivet   PetscFunctionReturn(PETSC_SUCCESS);
14614b9405b2SPierre Jolivet }
14624b9405b2SPierre Jolivet 
146353587d93SPierre Jolivet // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a
146453587d93SPierre Jolivet // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated
146553587d93SPierre Jolivet // and its rows and columns permuted
146653587d93SPierre Jolivet // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest()
146753587d93SPierre Jolivet static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap)
146853587d93SPierre Jolivet {
146953587d93SPierre Jolivet   Mat         A;
147053587d93SPierre Jolivet   PetscScalar s[2];
147153587d93SPierre Jolivet   PetscBool   isTrans, isHTrans, compare;
147253587d93SPierre Jolivet 
147353587d93SPierre Jolivet   PetscFunctionBegin;
147453587d93SPierre Jolivet   do {
147553587d93SPierre Jolivet     PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans));
147653587d93SPierre Jolivet     if (isTrans) {
147753587d93SPierre Jolivet       PetscCall(MatTransposeGetMat(*sub, &A));
147853587d93SPierre Jolivet       isHTrans = PETSC_FALSE;
147953587d93SPierre Jolivet     } else {
148053587d93SPierre Jolivet       PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
148153587d93SPierre Jolivet       if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A));
148253587d93SPierre Jolivet     }
148353587d93SPierre Jolivet     compare = (PetscBool)(isTrans || isHTrans);
148453587d93SPierre Jolivet     if (compare) {
148553587d93SPierre Jolivet       if (vshift && vscale) {
148653587d93SPierre Jolivet         PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
148753587d93SPierre Jolivet         if (!*conjugate) {
148853587d93SPierre Jolivet           *vshift += s[0] * *vscale;
148953587d93SPierre Jolivet           *vscale *= s[1];
149053587d93SPierre Jolivet         } else {
149153587d93SPierre Jolivet           *vshift += PetscConj(s[0]) * *vscale;
149253587d93SPierre Jolivet           *vscale *= PetscConj(s[1]);
149353587d93SPierre Jolivet         }
149453587d93SPierre Jolivet       }
149553587d93SPierre Jolivet       if (swap) *swap = (PetscBool)!*swap;
149653587d93SPierre Jolivet       if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate;
149753587d93SPierre Jolivet       *sub = A;
149853587d93SPierre Jolivet     }
149953587d93SPierre Jolivet   } while (compare);
150053587d93SPierre Jolivet   PetscFunctionReturn(PETSC_SUCCESS);
150153587d93SPierre Jolivet }
150253587d93SPierre Jolivet 
150366976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
15049d0448ceSStefano Zampini {
15059d0448ceSStefano Zampini   Mat     **mats;
15069d0448ceSStefano Zampini   PetscInt  nr, nc;
15079d0448ceSStefano Zampini   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
15089d0448ceSStefano Zampini 
15099d0448ceSStefano Zampini   PetscFunctionBegin;
15109d0448ceSStefano Zampini   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
15119d0448ceSStefano Zampini   if (reuse == MAT_INITIAL_MATRIX) {
15129d0448ceSStefano Zampini     PetscMUMPSInt *irns, *jcns;
15139d0448ceSStefano Zampini     PetscScalar   *vals;
15146497c311SBarry Smith     PetscCount     totnnz, cumnnz, maxnnz;
151593d70b8aSPierre Jolivet     PetscInt      *pjcns_w, Mbs = 0;
15169d0448ceSStefano Zampini     IS            *rows, *cols;
15179d0448ceSStefano Zampini     PetscInt     **rows_idx, **cols_idx;
15189d0448ceSStefano Zampini 
15199d0448ceSStefano Zampini     cumnnz = 0;
15209d0448ceSStefano Zampini     maxnnz = 0;
15215d955bbbSStefano Zampini     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
15229d0448ceSStefano Zampini     for (PetscInt r = 0; r < nr; r++) {
15239d0448ceSStefano Zampini       for (PetscInt c = 0; c < nc; c++) {
15249d0448ceSStefano Zampini         Mat sub = mats[r][c];
15259d0448ceSStefano Zampini 
15269d0448ceSStefano Zampini         mumps->nest_convert_to_triples[r * nc + c] = NULL;
15279d0448ceSStefano Zampini         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
15289d0448ceSStefano Zampini         if (sub) {
15299d0448ceSStefano Zampini           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
153053587d93SPierre Jolivet           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
15319d0448ceSStefano Zampini           MatInfo   info;
15329d0448ceSStefano Zampini 
153353587d93SPierre Jolivet           PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
15349d0448ceSStefano Zampini           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
15359d0448ceSStefano Zampini           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
15369d0448ceSStefano Zampini           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
15379d0448ceSStefano Zampini           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
15389d0448ceSStefano Zampini           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
15399d0448ceSStefano Zampini           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1540c3e1b152SPierre Jolivet           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
15414b9405b2SPierre Jolivet           PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
15429d0448ceSStefano Zampini 
15439d0448ceSStefano Zampini           if (chol) {
15449d0448ceSStefano Zampini             if (r == c) {
15459d0448ceSStefano Zampini               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
15469d0448ceSStefano Zampini               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
15479d0448ceSStefano Zampini               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
15489d0448ceSStefano Zampini               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1549c3e1b152SPierre Jolivet               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
15504b9405b2SPierre Jolivet               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
15519d0448ceSStefano Zampini             } else {
15529d0448ceSStefano Zampini               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
15539d0448ceSStefano Zampini               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
15549d0448ceSStefano Zampini               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
15559d0448ceSStefano Zampini               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1556c3e1b152SPierre Jolivet               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
15574b9405b2SPierre Jolivet               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
15589d0448ceSStefano Zampini             }
15599d0448ceSStefano Zampini           } else {
15609d0448ceSStefano Zampini             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
15619d0448ceSStefano Zampini             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
15629d0448ceSStefano Zampini             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
15639d0448ceSStefano Zampini             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1564c3e1b152SPierre Jolivet             else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
15654b9405b2SPierre Jolivet             else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
15669d0448ceSStefano Zampini           }
15679d0448ceSStefano Zampini           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
15689d0448ceSStefano Zampini           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
15699d0448ceSStefano Zampini           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
15706497c311SBarry Smith           cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */
15719d0448ceSStefano Zampini           maxnnz = PetscMax(maxnnz, info.nz_used);
15729d0448ceSStefano Zampini         }
15739d0448ceSStefano Zampini       }
15749d0448ceSStefano Zampini     }
15759d0448ceSStefano Zampini 
15769d0448ceSStefano Zampini     /* Allocate total COO */
15779d0448ceSStefano Zampini     totnnz = cumnnz;
15789d0448ceSStefano Zampini     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
15799d0448ceSStefano Zampini     PetscCall(PetscMalloc1(totnnz, &vals));
15809d0448ceSStefano Zampini 
15819d0448ceSStefano Zampini     /* Handle rows and column maps
15829d0448ceSStefano Zampini        We directly map rows and use an SF for the columns */
15839d0448ceSStefano Zampini     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
15849d0448ceSStefano Zampini     PetscCall(MatNestGetISs(A, rows, cols));
15859d0448ceSStefano Zampini     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
15869d0448ceSStefano Zampini     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
15879d0448ceSStefano Zampini     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
15885d955bbbSStefano Zampini     else (void)maxnnz;
15899d0448ceSStefano Zampini 
15909d0448ceSStefano Zampini     cumnnz = 0;
15919d0448ceSStefano Zampini     for (PetscInt r = 0; r < nr; r++) {
15929d0448ceSStefano Zampini       for (PetscInt c = 0; c < nc; c++) {
15939d0448ceSStefano Zampini         Mat             sub    = mats[r][c];
15949d0448ceSStefano Zampini         const PetscInt *ridx   = rows_idx[r];
15955d955bbbSStefano Zampini         const PetscInt *cidx   = cols_idx[c];
159653587d93SPierre Jolivet         PetscScalar     vscale = 1.0, vshift = 0.0;
159793d70b8aSPierre Jolivet         PetscInt        rst, size, bs;
15989d0448ceSStefano Zampini         PetscSF         csf;
159953587d93SPierre Jolivet         PetscBool       conjugate = PETSC_FALSE, swap = PETSC_FALSE;
16005d955bbbSStefano Zampini         PetscLayout     cmap;
16016497c311SBarry Smith         PetscInt        innz;
16029d0448ceSStefano Zampini 
16039d0448ceSStefano Zampini         mumps->nest_vals_start[r * nc + c] = cumnnz;
160493d70b8aSPierre Jolivet         if (c == r) {
160593d70b8aSPierre Jolivet           PetscCall(ISGetSize(rows[r], &size));
160693d70b8aSPierre Jolivet           if (!mumps->nest_convert_to_triples[r * nc + c]) {
160793d70b8aSPierre Jolivet             for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
160893d70b8aSPierre Jolivet           }
160993d70b8aSPierre Jolivet           PetscCall(MatGetBlockSize(sub, &bs));
161093d70b8aSPierre Jolivet           Mbs += size / bs;
161193d70b8aSPierre Jolivet         }
16129d0448ceSStefano Zampini         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
16139d0448ceSStefano Zampini 
16145d955bbbSStefano Zampini         /* Extract inner blocks if needed */
161553587d93SPierre Jolivet         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap));
161653587d93SPierre Jolivet         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
16175d955bbbSStefano Zampini 
16185d955bbbSStefano Zampini         /* Get column layout to map off-process columns */
16195d955bbbSStefano Zampini         PetscCall(MatGetLayouts(sub, NULL, &cmap));
16205d955bbbSStefano Zampini 
16215d955bbbSStefano Zampini         /* Get row start to map on-process rows */
16225d955bbbSStefano Zampini         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
16235d955bbbSStefano Zampini 
16249d0448ceSStefano Zampini         /* Directly use the mumps datastructure and use C ordering for now */
16259d0448ceSStefano Zampini         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
16269d0448ceSStefano Zampini 
16275d955bbbSStefano Zampini         /* Swap the role of rows and columns indices for transposed blocks
16285d955bbbSStefano Zampini            since we need values with global final ordering */
16295d955bbbSStefano Zampini         if (swap) {
16305d955bbbSStefano Zampini           cidx = rows_idx[r];
16315d955bbbSStefano Zampini           ridx = cols_idx[c];
16329d0448ceSStefano Zampini         }
16339d0448ceSStefano Zampini 
16345d955bbbSStefano Zampini         /* Communicate column indices
16355d955bbbSStefano Zampini            This could have been done with a single SF but it would have complicated the code a lot.
16365d955bbbSStefano Zampini            But since we do it only once, we pay the price of setting up an SF for each block */
16375d955bbbSStefano Zampini         if (PetscDefined(USE_64BIT_INDICES)) {
16385d955bbbSStefano Zampini           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1639f4f49eeaSPierre Jolivet         } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */
16409d0448ceSStefano Zampini         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
16416497c311SBarry Smith         PetscCall(PetscIntCast(mumps->nnz, &innz));
16426497c311SBarry Smith         PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w));
16435d955bbbSStefano Zampini         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
16445d955bbbSStefano Zampini         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
16459d0448ceSStefano Zampini         PetscCall(PetscSFDestroy(&csf));
16469d0448ceSStefano Zampini 
16475d955bbbSStefano Zampini         /* Import indices: use direct map for rows and mapped indices for columns */
16485d955bbbSStefano Zampini         if (swap) {
16495d955bbbSStefano Zampini           for (PetscInt k = 0; k < mumps->nnz; k++) {
16505d955bbbSStefano Zampini             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
16515d955bbbSStefano Zampini             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
16525d955bbbSStefano Zampini           }
16535d955bbbSStefano Zampini         } else {
16545d955bbbSStefano Zampini           for (PetscInt k = 0; k < mumps->nnz; k++) {
16555d955bbbSStefano Zampini             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
16565d955bbbSStefano Zampini             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
16575d955bbbSStefano Zampini           }
16585d955bbbSStefano Zampini         }
16595d955bbbSStefano Zampini 
16605d955bbbSStefano Zampini         /* Import values to full COO */
166153587d93SPierre Jolivet         if (conjugate) { /* conjugate the entries */
166250c845baSStefano Zampini           PetscScalar *v = vals + cumnnz;
166353587d93SPierre Jolivet           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]);
166453587d93SPierre Jolivet         } else if (vscale != 1.0) {
166553587d93SPierre Jolivet           PetscScalar *v = vals + cumnnz;
166653587d93SPierre Jolivet           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k];
166753587d93SPierre Jolivet         } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
16689d0448ceSStefano Zampini 
16699d0448ceSStefano Zampini         /* Shift new starting point and sanity check */
16709d0448ceSStefano Zampini         cumnnz += mumps->nnz;
16716497c311SBarry Smith         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
16729d0448ceSStefano Zampini 
16739d0448ceSStefano Zampini         /* Free scratch memory */
16749d0448ceSStefano Zampini         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
16759d0448ceSStefano Zampini         PetscCall(PetscFree(mumps->val_alloc));
16769d0448ceSStefano Zampini         mumps->val = NULL;
16779d0448ceSStefano Zampini         mumps->nnz = 0;
16789d0448ceSStefano Zampini       }
16799d0448ceSStefano Zampini     }
168093d70b8aSPierre Jolivet     if (mumps->id.ICNTL(15) == 1) {
168193d70b8aSPierre Jolivet       if (Mbs != A->rmap->N) {
168293d70b8aSPierre Jolivet         PetscMPIInt rank, size;
168393d70b8aSPierre Jolivet 
168493d70b8aSPierre Jolivet         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
168593d70b8aSPierre Jolivet         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
168693d70b8aSPierre Jolivet         if (rank == 0) {
168793d70b8aSPierre Jolivet           PetscInt shift = 0;
168893d70b8aSPierre Jolivet 
168993d70b8aSPierre Jolivet           PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk));
169093d70b8aSPierre Jolivet           PetscCall(PetscFree(mumps->id.blkptr));
169193d70b8aSPierre Jolivet           PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr));
169293d70b8aSPierre Jolivet           mumps->id.blkptr[0] = 1;
169393d70b8aSPierre Jolivet           for (PetscInt i = 0; i < size; ++i) {
169493d70b8aSPierre Jolivet             for (PetscInt r = 0; r < nr; r++) {
169593d70b8aSPierre Jolivet               Mat             sub = mats[r][r];
169693d70b8aSPierre Jolivet               const PetscInt *ranges;
169793d70b8aSPierre Jolivet               PetscInt        bs;
169893d70b8aSPierre Jolivet 
169993d70b8aSPierre Jolivet               for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
170093d70b8aSPierre Jolivet               PetscCall(MatGetOwnershipRanges(sub, &ranges));
170193d70b8aSPierre Jolivet               PetscCall(MatGetBlockSize(sub, &bs));
170293d70b8aSPierre Jolivet               for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1));
170393d70b8aSPierre Jolivet               shift += (ranges[i + 1] - ranges[i]) / bs;
170493d70b8aSPierre Jolivet             }
170593d70b8aSPierre Jolivet           }
170693d70b8aSPierre Jolivet         }
170793d70b8aSPierre Jolivet       } else mumps->id.ICNTL(15) = 0;
170893d70b8aSPierre Jolivet     }
17099d0448ceSStefano Zampini     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
17109d0448ceSStefano Zampini     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
17119d0448ceSStefano Zampini     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
17129d0448ceSStefano Zampini     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
17136497c311SBarry Smith     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
17145d955bbbSStefano Zampini     mumps->nest_vals_start[nr * nc] = cumnnz;
17159d0448ceSStefano Zampini 
17169d0448ceSStefano Zampini     /* Set pointers for final MUMPS data structure */
17179d0448ceSStefano Zampini     mumps->nest_vals = vals;
17189d0448ceSStefano Zampini     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
17199d0448ceSStefano Zampini     mumps->val       = vals;
17209d0448ceSStefano Zampini     mumps->irn       = irns;
17219d0448ceSStefano Zampini     mumps->jcn       = jcns;
17229d0448ceSStefano Zampini     mumps->nnz       = cumnnz;
17239d0448ceSStefano Zampini   } else {
17249d0448ceSStefano Zampini     PetscScalar *oval = mumps->nest_vals;
17259d0448ceSStefano Zampini     for (PetscInt r = 0; r < nr; r++) {
17269d0448ceSStefano Zampini       for (PetscInt c = 0; c < nc; c++) {
172753587d93SPierre Jolivet         PetscBool   conjugate = PETSC_FALSE;
17285d955bbbSStefano Zampini         Mat         sub       = mats[r][c];
172953587d93SPierre Jolivet         PetscScalar vscale = 1.0, vshift = 0.0;
17305d955bbbSStefano Zampini         PetscInt    midx = r * nc + c;
17315d955bbbSStefano Zampini 
17325d955bbbSStefano Zampini         if (!mumps->nest_convert_to_triples[midx]) continue;
173353587d93SPierre Jolivet         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL));
173453587d93SPierre Jolivet         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
17355d955bbbSStefano Zampini         mumps->val = oval + mumps->nest_vals_start[midx];
17365d955bbbSStefano Zampini         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
173753587d93SPierre Jolivet         if (conjugate) {
17386497c311SBarry Smith           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
173953587d93SPierre Jolivet           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]);
174053587d93SPierre Jolivet         } else if (vscale != 1.0) {
174153587d93SPierre Jolivet           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
174253587d93SPierre Jolivet           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale;
17435d955bbbSStefano Zampini         }
17449d0448ceSStefano Zampini       }
17459d0448ceSStefano Zampini     }
17469d0448ceSStefano Zampini     mumps->val = oval;
17479d0448ceSStefano Zampini   }
17489d0448ceSStefano Zampini   PetscFunctionReturn(PETSC_SUCCESS);
17499d0448ceSStefano Zampini }
17509d0448ceSStefano Zampini 
175166976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MUMPS(Mat A)
1752d71ae5a4SJacob Faibussowitsch {
1753a6053eceSJunchao Zhang   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1754b24902e0SBarry Smith 
1755397b6df1SKris Buschelman   PetscFunctionBegin;
1756cf053153SJunchao Zhang   PetscCall(PetscFree(mumps->id.isol_loc));
17579566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
17589566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&mumps->scat_sol));
17599566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mumps->b_seq));
17609566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mumps->x_seq));
17619566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->id.perm_in));
176293d70b8aSPierre Jolivet   PetscCall(PetscFree(mumps->id.blkvar));
176393d70b8aSPierre Jolivet   PetscCall(PetscFree(mumps->id.blkptr));
17649566063dSJacob Faibussowitsch   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
17659566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->val_alloc));
17669566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->info));
1767413bcc21SPierre Jolivet   PetscCall(PetscFree(mumps->ICNTL_pre));
1768413bcc21SPierre Jolivet   PetscCall(PetscFree(mumps->CNTL_pre));
17699566063dSJacob Faibussowitsch   PetscCall(MatMumpsResetSchur_Private(mumps));
1770413bcc21SPierre Jolivet   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1771a5e57a09SHong Zhang     mumps->id.job = JOB_END;
17723ab56b82SJunchao Zhang     PetscMUMPS_c(mumps);
17739261f6e4SBarry Smith     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1774413bcc21SPierre Jolivet     if (mumps->mumps_comm != MPI_COMM_NULL) {
1775413bcc21SPierre Jolivet       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1776413bcc21SPierre Jolivet       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1777413bcc21SPierre Jolivet     }
1778413bcc21SPierre Jolivet   }
1779cf053153SJunchao Zhang   PetscCall(MatMumpsFreeInternalID(&mumps->id));
17803ab56b82SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT)
178167602552SJunchao Zhang   if (mumps->use_petsc_omp_support) {
17829566063dSJacob Faibussowitsch     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
17839566063dSJacob Faibussowitsch     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
17849566063dSJacob Faibussowitsch     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
178567602552SJunchao Zhang   }
17863ab56b82SJunchao Zhang #endif
17879566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->ia_alloc));
17889566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->ja_alloc));
17899566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->recvcount));
17909566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->reqs));
17919566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->irhs_loc));
17929d0448ceSStefano Zampini   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
17939d0448ceSStefano Zampini   PetscCall(PetscFree(mumps->nest_vals));
17949566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1795bf0cc555SLisandro Dalcin 
179697969023SHong Zhang   /* clear composed functions */
17979566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
17989566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
17999566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
18009566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
18019566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
18029566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
18039566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
18049566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
18059566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
18069566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
18079566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
18085c0bae8cSAshish Patel   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
18099566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
18109566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
181193d70b8aSPierre Jolivet   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL));
18123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1813397b6df1SKris Buschelman }
1814397b6df1SKris Buschelman 
181567602552SJunchao Zhang /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1816d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1817d71ae5a4SJacob Faibussowitsch {
181867602552SJunchao Zhang   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
181967602552SJunchao Zhang   const PetscMPIInt ompsize = mumps->omp_comm_size;
182067602552SJunchao Zhang   PetscInt          i, m, M, rstart;
182167602552SJunchao Zhang 
182267602552SJunchao Zhang   PetscFunctionBegin;
18239566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &M, NULL));
18249566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A, &m, NULL));
182508401ef6SPierre Jolivet   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
182667602552SJunchao Zhang   if (ompsize == 1) {
182767602552SJunchao Zhang     if (!mumps->irhs_loc) {
18286497c311SBarry Smith       mumps->nloc_rhs = (PetscMUMPSInt)m;
18299566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
18309566063dSJacob Faibussowitsch       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
18316497c311SBarry Smith       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */
183267602552SJunchao Zhang     }
1833cf053153SJunchao Zhang     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
183467602552SJunchao Zhang   } else {
183567602552SJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT)
183667602552SJunchao Zhang     const PetscInt *ranges;
183767602552SJunchao Zhang     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
183867602552SJunchao Zhang     MPI_Group       petsc_group, omp_group;
183967602552SJunchao Zhang     PetscScalar    *recvbuf = NULL;
184067602552SJunchao Zhang 
184167602552SJunchao Zhang     if (mumps->is_omp_master) {
184267602552SJunchao Zhang       /* Lazily initialize the omp stuff for distributed rhs */
184367602552SJunchao Zhang       if (!mumps->irhs_loc) {
18449566063dSJacob Faibussowitsch         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
18459566063dSJacob Faibussowitsch         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
18469566063dSJacob Faibussowitsch         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
18479566063dSJacob Faibussowitsch         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
184867602552SJunchao Zhang         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
18499566063dSJacob Faibussowitsch         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
185067602552SJunchao Zhang 
185167602552SJunchao Zhang         /* Populate mumps->irhs_loc[], rhs_nrow[] */
185267602552SJunchao Zhang         mumps->nloc_rhs = 0;
18539566063dSJacob Faibussowitsch         PetscCall(MatGetOwnershipRanges(A, &ranges));
185467602552SJunchao Zhang         for (j = 0; j < ompsize; j++) {
185567602552SJunchao Zhang           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
185667602552SJunchao Zhang           mumps->nloc_rhs += mumps->rhs_nrow[j];
185767602552SJunchao Zhang         }
18589566063dSJacob Faibussowitsch         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
185967602552SJunchao Zhang         for (j = k = 0; j < ompsize; j++) {
1860407b358cSPierre Jolivet           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */
186167602552SJunchao Zhang         }
186267602552SJunchao Zhang 
18639566063dSJacob Faibussowitsch         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
18649566063dSJacob Faibussowitsch         PetscCallMPI(MPI_Group_free(&petsc_group));
18659566063dSJacob Faibussowitsch         PetscCallMPI(MPI_Group_free(&omp_group));
186667602552SJunchao Zhang       }
186767602552SJunchao Zhang 
186867602552SJunchao Zhang       /* Realloc buffers when current nrhs is bigger than what we have met */
186967602552SJunchao Zhang       if (nrhs > mumps->max_nrhs) {
18709566063dSJacob Faibussowitsch         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
18719566063dSJacob Faibussowitsch         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
187267602552SJunchao Zhang         mumps->max_nrhs = nrhs;
187367602552SJunchao Zhang       }
187467602552SJunchao Zhang 
187567602552SJunchao Zhang       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
18769566063dSJacob Faibussowitsch       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
187767602552SJunchao Zhang       mumps->rhs_disps[0] = 0;
187867602552SJunchao Zhang       for (j = 1; j < ompsize; j++) {
187967602552SJunchao Zhang         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
188008401ef6SPierre Jolivet         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
188167602552SJunchao Zhang       }
188267602552SJunchao Zhang       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
188367602552SJunchao Zhang     }
188467602552SJunchao Zhang 
18859566063dSJacob Faibussowitsch     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
18869566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
188767602552SJunchao Zhang 
188867602552SJunchao Zhang     if (mumps->is_omp_master) {
188967602552SJunchao Zhang       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
189067602552SJunchao Zhang         PetscScalar *dst, *dstbase = mumps->rhs_loc;
189167602552SJunchao Zhang         for (j = 0; j < ompsize; j++) {
189267602552SJunchao Zhang           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
189367602552SJunchao Zhang           dst                    = dstbase;
189467602552SJunchao Zhang           for (i = 0; i < nrhs; i++) {
18959566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
189667602552SJunchao Zhang             src += mumps->rhs_nrow[j];
189767602552SJunchao Zhang             dst += mumps->nloc_rhs;
189867602552SJunchao Zhang           }
189967602552SJunchao Zhang           dstbase += mumps->rhs_nrow[j];
190067602552SJunchao Zhang         }
190167602552SJunchao Zhang       }
1902cf053153SJunchao Zhang       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
190367602552SJunchao Zhang     }
190467602552SJunchao Zhang #endif /* PETSC_HAVE_OPENMP_SUPPORT */
190567602552SJunchao Zhang   }
19066497c311SBarry Smith   mumps->id.nrhs     = (PetscMUMPSInt)nrhs;
19076497c311SBarry Smith   mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs;
190867602552SJunchao Zhang   mumps->id.lrhs_loc = mumps->nloc_rhs;
190967602552SJunchao Zhang   mumps->id.irhs_loc = mumps->irhs_loc;
19103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
191167602552SJunchao Zhang }
191267602552SJunchao Zhang 
191366976f2fSJacob Faibussowitsch static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1914d71ae5a4SJacob Faibussowitsch {
1915e69c285eSBarry Smith   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1916cf053153SJunchao Zhang   const PetscScalar *barray = NULL;
1917d54de34fSKris Buschelman   PetscScalar       *array;
1918329ec9b3SHong Zhang   IS                 is_iden, is_petsc;
1919329ec9b3SHong Zhang   PetscInt           i;
1920cc86f929SStefano Zampini   PetscBool          second_solve = PETSC_FALSE;
1921883f2eb9SBarry Smith   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1922397b6df1SKris Buschelman 
1923397b6df1SKris Buschelman   PetscFunctionBegin;
19249371c9d4SSatish Balay   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
19259371c9d4SSatish Balay                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
19269371c9d4SSatish Balay                                    &cite1));
19279371c9d4SSatish Balay   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
19289371c9d4SSatish Balay                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
19299371c9d4SSatish Balay                                    &cite2));
19302aca8efcSHong Zhang 
1931f480ea8aSBarry Smith   PetscCall(VecFlag(x, A->factorerrortype));
1932603e8f96SBarry Smith   if (A->factorerrortype) {
19339566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
19343ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
19352aca8efcSHong Zhang   }
19362aca8efcSHong Zhang 
1937a5e57a09SHong Zhang   mumps->id.nrhs = 1;
19382d4298aeSJunchao Zhang   if (mumps->petsc_size > 1) {
193925aac85cSJunchao Zhang     if (mumps->ICNTL20 == 10) {
1940cf053153SJunchao Zhang       mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */
1941cf053153SJunchao Zhang       PetscCall(VecGetArrayRead(b, &barray));
1942cf053153SJunchao Zhang       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray));
194325aac85cSJunchao Zhang     } else {
1944cf053153SJunchao Zhang       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/
19459566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
19469566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
194767602552SJunchao Zhang       if (!mumps->myid) {
19489566063dSJacob Faibussowitsch         PetscCall(VecGetArray(mumps->b_seq, &array));
1949cf053153SJunchao Zhang         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
195067602552SJunchao Zhang       }
195125aac85cSJunchao Zhang     }
1952cf053153SJunchao Zhang   } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */
1953cf053153SJunchao Zhang     mumps->id.ICNTL(20) = 0;
19549566063dSJacob Faibussowitsch     PetscCall(VecCopy(b, x));
19559566063dSJacob Faibussowitsch     PetscCall(VecGetArray(x, &array));
1956cf053153SJunchao Zhang     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1957397b6df1SKris Buschelman   }
1958397b6df1SKris Buschelman 
1959cc86f929SStefano Zampini   /*
1960cc86f929SStefano Zampini      handle condensation step of Schur complement (if any)
1961cc86f929SStefano Zampini      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1962cc86f929SStefano Zampini      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1963cc86f929SStefano Zampini      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1964cc86f929SStefano Zampini      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1965cc86f929SStefano Zampini   */
19663e5b40d0SPierre Jolivet   if (mumps->id.size_schur > 0) {
196708401ef6SPierre Jolivet     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
19683e5b40d0SPierre Jolivet     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1969cc86f929SStefano Zampini       second_solve = PETSC_TRUE;
1970cf053153SJunchao Zhang       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
19713e5b40d0SPierre Jolivet       mumps->id.ICNTL(26) = 1;                                /* condensation phase */
19723e5b40d0SPierre Jolivet     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1973cc86f929SStefano Zampini   }
1974cf053153SJunchao Zhang 
1975a5e57a09SHong Zhang   mumps->id.job = JOB_SOLVE;
1976cf053153SJunchao Zhang   PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs
1977cf053153SJunchao Zhang   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
1978397b6df1SKris Buschelman 
1979b5fa320bSStefano Zampini   /* handle expansion step of Schur complement (if any) */
19801baa6e33SBarry Smith   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1981cf053153SJunchao Zhang   else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
19823e5b40d0SPierre Jolivet     PetscCall(MatMumpsSolveSchur_Private(A));
1983cf053153SJunchao Zhang     for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i);
19843e5b40d0SPierre Jolivet   }
1985b5fa320bSStefano Zampini 
1986f0b74427SPierre Jolivet   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */
1987a5e57a09SHong Zhang     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1988cf053153SJunchao Zhang       /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */
19899566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1990397b6df1SKris Buschelman     }
1991a5e57a09SHong Zhang     if (!mumps->scat_sol) { /* create scatter scat_sol */
1992a6053eceSJunchao Zhang       PetscInt *isol2_loc = NULL;
19939566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
19949566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1995a6053eceSJunchao Zhang       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
19969566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
19979566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
19989566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&is_iden));
19999566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&is_petsc));
2000a5e57a09SHong Zhang       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
2001397b6df1SKris Buschelman     }
2002a5e57a09SHong Zhang 
2003cf053153SJunchao Zhang     PetscScalar *xarray;
2004cf053153SJunchao Zhang     PetscCall(VecGetArray(mumps->x_seq, &xarray));
2005cf053153SJunchao Zhang     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray));
2006cf053153SJunchao Zhang     PetscCall(VecRestoreArray(mumps->x_seq, &xarray));
20079566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
20089566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2009353d7d71SJunchao Zhang 
2010cf053153SJunchao Zhang     if (mumps->ICNTL20 == 10) { // distributed RHS
2011cf053153SJunchao Zhang       PetscCall(VecRestoreArrayRead(b, &barray));
2012cf053153SJunchao Zhang     } else if (!mumps->myid) { // centralized RHS
20139566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mumps->b_seq, &array));
201425aac85cSJunchao Zhang     }
2015cf053153SJunchao Zhang   } else {
2016cf053153SJunchao Zhang     // id.rhs has the solution in mumps precision
2017cf053153SJunchao Zhang     PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array));
2018cf053153SJunchao Zhang     PetscCall(VecRestoreArray(x, &array));
2019cf053153SJunchao Zhang   }
2020353d7d71SJunchao Zhang 
202164412097SPierre Jolivet   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
20223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2023397b6df1SKris Buschelman }
2024397b6df1SKris Buschelman 
202566976f2fSJacob Faibussowitsch static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
2026d71ae5a4SJacob Faibussowitsch {
2027e69c285eSBarry Smith   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2028338d3105SPierre Jolivet   const PetscMUMPSInt value = mumps->id.ICNTL(9);
202951d5961aSHong Zhang 
203051d5961aSHong Zhang   PetscFunctionBegin;
2031a5e57a09SHong Zhang   mumps->id.ICNTL(9) = 0;
20329566063dSJacob Faibussowitsch   PetscCall(MatSolve_MUMPS(A, b, x));
2033338d3105SPierre Jolivet   mumps->id.ICNTL(9) = value;
20343ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
203551d5961aSHong Zhang }
203651d5961aSHong Zhang 
203766976f2fSJacob Faibussowitsch static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
2038d71ae5a4SJacob Faibussowitsch {
2039b8491c3eSStefano Zampini   Mat                Bt = NULL;
2040a6053eceSJunchao Zhang   PetscBool          denseX, denseB, flg, flgT;
2041e69c285eSBarry Smith   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
2042917c3dccSPierre Jolivet   PetscInt           i, nrhs, M, nrhsM;
20431683a169SBarry Smith   PetscScalar       *array;
2044cf053153SJunchao Zhang   const PetscScalar *barray;
2045a6053eceSJunchao Zhang   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
2046a6053eceSJunchao Zhang   PetscMUMPSInt     *isol_loc, *isol_loc_save;
2047cf053153SJunchao Zhang   PetscScalar       *sol_loc;
2048cf053153SJunchao Zhang   void              *sol_loc_save;
2049cf053153SJunchao Zhang   PetscCount         sol_loc_len_save;
2050be818407SHong Zhang   IS                 is_to, is_from;
2051beae5ec0SHong Zhang   PetscInt           k, proc, j, m, myrstart;
2052be818407SHong Zhang   const PetscInt    *rstart;
205367602552SJunchao Zhang   Vec                v_mpi, msol_loc;
205467602552SJunchao Zhang   VecScatter         scat_sol;
205567602552SJunchao Zhang   Vec                b_seq;
205667602552SJunchao Zhang   VecScatter         scat_rhs;
2057be818407SHong Zhang   PetscScalar       *aa;
2058be818407SHong Zhang   PetscInt           spnr, *ia, *ja;
2059d56c302dSHong Zhang   Mat_MPIAIJ        *b = NULL;
2060bda8bf91SBarry Smith 
2061e0b74bf9SHong Zhang   PetscFunctionBegin;
20629566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
206328b400f6SJacob Faibussowitsch   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
2064be818407SHong Zhang 
20659566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
2066cf053153SJunchao Zhang 
2067a6053eceSJunchao Zhang   if (denseB) {
206808401ef6SPierre Jolivet     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
2069be818407SHong Zhang     mumps->id.ICNTL(20) = 0; /* dense RHS */
20700e6b8875SHong Zhang   } else {                   /* sparse B */
207108401ef6SPierre Jolivet     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
2072013e2dc7SBarry Smith     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
207353587d93SPierre Jolivet     PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
207453587d93SPierre Jolivet     PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
207553587d93SPierre Jolivet     /* input B is transpose of actual RHS matrix,
20760e6b8875SHong Zhang      because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
20779566063dSJacob Faibussowitsch     PetscCall(MatTransposeGetMat(B, &Bt));
2078be818407SHong Zhang     mumps->id.ICNTL(20) = 1; /* sparse RHS */
2079b8491c3eSStefano Zampini   }
208087b22cf4SHong Zhang 
20819566063dSJacob Faibussowitsch   PetscCall(MatGetSize(B, &M, &nrhs));
2082917c3dccSPierre Jolivet   PetscCall(PetscIntMultError(nrhs, M, &nrhsM));
20836497c311SBarry Smith   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
20846497c311SBarry Smith   mumps->id.lrhs = (PetscMUMPSInt)M;
20859481e6e9SHong Zhang 
2086cf053153SJunchao Zhang   if (mumps->petsc_size == 1) { // handle this easy case specially and return early
2087b8491c3eSStefano Zampini     PetscScalar *aa;
2088b8491c3eSStefano Zampini     PetscInt     spnr, *ia, *ja;
2089e94cce23SStefano Zampini     PetscBool    second_solve = PETSC_FALSE;
2090b8491c3eSStefano Zampini 
20919566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArray(X, &array));
2092a6053eceSJunchao Zhang     if (denseB) {
20932b691707SHong Zhang       /* copy B to X */
2094cf053153SJunchao Zhang       PetscCall(MatDenseGetArrayRead(B, &barray));
2095cf053153SJunchao Zhang       PetscCall(PetscArraycpy(array, barray, nrhsM));
2096cf053153SJunchao Zhang       PetscCall(MatDenseRestoreArrayRead(B, &barray));
20972b691707SHong Zhang     } else { /* sparse B */
20989566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJGetArray(Bt, &aa));
20999566063dSJacob Faibussowitsch       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
210028b400f6SJacob Faibussowitsch       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
21019566063dSJacob Faibussowitsch       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2102cf053153SJunchao Zhang       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2103b8491c3eSStefano Zampini     }
2104cf053153SJunchao Zhang     PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2105cf053153SJunchao Zhang 
2106e94cce23SStefano Zampini     /* handle condensation step of Schur complement (if any) */
21073e5b40d0SPierre Jolivet     if (mumps->id.size_schur > 0) {
21083e5b40d0SPierre Jolivet       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
2109e94cce23SStefano Zampini         second_solve = PETSC_TRUE;
2110cf053153SJunchao Zhang         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
2111cf053153SJunchao Zhang         mumps->id.ICNTL(26) = 1;                                /* condensation phase, i.e, to solve id.redrhs */
21123e5b40d0SPierre Jolivet       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
2113e94cce23SStefano Zampini     }
2114cf053153SJunchao Zhang 
21152cd7d884SHong Zhang     mumps->id.job = JOB_SOLVE;
21163ab56b82SJunchao Zhang     PetscMUMPS_c(mumps);
2117cf053153SJunchao Zhang     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2118b5fa320bSStefano Zampini 
2119b5fa320bSStefano Zampini     /* handle expansion step of Schur complement (if any) */
21201baa6e33SBarry Smith     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
2121cf053153SJunchao Zhang     else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
21223e5b40d0SPierre Jolivet       PetscCall(MatMumpsSolveSchur_Private(A));
21233e5b40d0SPierre Jolivet       for (j = 0; j < nrhs; ++j)
2124cf053153SJunchao Zhang         for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs);
21253e5b40d0SPierre Jolivet     }
2126cf053153SJunchao Zhang 
2127cf053153SJunchao Zhang     if (!denseB) { /* sparse B, restore ia, ja */
21289566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
21299566063dSJacob Faibussowitsch       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
213028b400f6SJacob Faibussowitsch       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2131b8491c3eSStefano Zampini     }
2132cf053153SJunchao Zhang 
2133cf053153SJunchao Zhang     // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X.
2134cf053153SJunchao Zhang     PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array));
21359566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArray(X, &array));
21363ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2137be818407SHong Zhang   }
2138801fbe65SHong Zhang 
21392ef1f0ffSBarry Smith   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
214050a7cd33SPierre Jolivet   PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
2141241dbb5eSStefano Zampini 
2142beae5ec0SHong Zhang   /* create msol_loc to hold mumps local solution */
2143cf053153SJunchao Zhang   isol_loc_save         = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */
2144cf053153SJunchao Zhang   sol_loc_save          = mumps->id.sol_loc;
2145cf053153SJunchao Zhang   sol_loc_len_save      = mumps->id.sol_loc_len;
2146cf053153SJunchao Zhang   mumps->id.isol_loc    = NULL; // an init state
2147cf053153SJunchao Zhang   mumps->id.sol_loc     = NULL;
2148cf053153SJunchao Zhang   mumps->id.sol_loc_len = 0;
2149801fbe65SHong Zhang 
2150a1dfcbd9SJunchao Zhang   lsol_loc = mumps->id.lsol_loc;
2151917c3dccSPierre Jolivet   PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */
21529566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
2153cf053153SJunchao Zhang   PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2154801fbe65SHong Zhang   mumps->id.isol_loc = isol_loc;
2155801fbe65SHong Zhang 
21569566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
21572cd7d884SHong Zhang 
215867602552SJunchao Zhang   if (denseB) {
215925aac85cSJunchao Zhang     if (mumps->ICNTL20 == 10) {
216067602552SJunchao Zhang       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
2161cf053153SJunchao Zhang       PetscCall(MatDenseGetArrayRead(B, &barray));
2162cf053153SJunchao Zhang       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc
2163cf053153SJunchao Zhang       PetscCall(MatDenseRestoreArrayRead(B, &barray));
21649566063dSJacob Faibussowitsch       PetscCall(MatGetLocalSize(B, &m, NULL));
2165cf053153SJunchao Zhang       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X
216625aac85cSJunchao Zhang     } else {
216725aac85cSJunchao Zhang       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
216880577c12SJunchao Zhang       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
216980577c12SJunchao Zhang         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
217080577c12SJunchao Zhang         0, re-arrange B into desired order, which is a local operation.
217180577c12SJunchao Zhang       */
217280577c12SJunchao Zhang 
217367602552SJunchao Zhang       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
2174be818407SHong Zhang       /* wrap dense rhs matrix B into a vector v_mpi */
21759566063dSJacob Faibussowitsch       PetscCall(MatGetLocalSize(B, &m, NULL));
2176cf053153SJunchao Zhang       PetscCall(MatDenseGetArrayRead(B, &barray));
2177cf053153SJunchao Zhang       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi));
2178cf053153SJunchao Zhang       PetscCall(MatDenseRestoreArrayRead(B, &barray));
21792b691707SHong Zhang 
2180cf053153SJunchao Zhang       /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */
2181801fbe65SHong Zhang       if (!mumps->myid) {
2182beae5ec0SHong Zhang         PetscInt *idx;
2183beae5ec0SHong Zhang         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
2184917c3dccSPierre Jolivet         PetscCall(PetscMalloc1(nrhsM, &idx));
21859566063dSJacob Faibussowitsch         PetscCall(MatGetOwnershipRanges(B, &rstart));
2186917c3dccSPierre Jolivet         for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) {
2187be818407SHong Zhang           for (j = 0; j < nrhs; j++) {
2188beae5ec0SHong Zhang             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
2189be818407SHong Zhang           }
2190be818407SHong Zhang         }
2191be818407SHong Zhang 
2192917c3dccSPierre Jolivet         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq));
2193917c3dccSPierre Jolivet         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to));
2194917c3dccSPierre Jolivet         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from));
2195801fbe65SHong Zhang       } else {
21969566063dSJacob Faibussowitsch         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
21979566063dSJacob Faibussowitsch         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
21989566063dSJacob Faibussowitsch         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
2199801fbe65SHong Zhang       }
2200cf053153SJunchao Zhang 
22019566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
22029566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
22039566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&is_to));
22049566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&is_from));
22059566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2206801fbe65SHong Zhang 
2207801fbe65SHong Zhang       if (!mumps->myid) { /* define rhs on the host */
2208cf053153SJunchao Zhang         PetscCall(VecGetArrayRead(b_seq, &barray));
2209cf053153SJunchao Zhang         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2210cf053153SJunchao Zhang         PetscCall(VecRestoreArrayRead(b_seq, &barray));
2211801fbe65SHong Zhang       }
221225aac85cSJunchao Zhang     }
22132b691707SHong Zhang   } else { /* sparse B */
22142b691707SHong Zhang     b = (Mat_MPIAIJ *)Bt->data;
22152b691707SHong Zhang 
2216be818407SHong Zhang     /* wrap dense X into a vector v_mpi */
22179566063dSJacob Faibussowitsch     PetscCall(MatGetLocalSize(X, &m, NULL));
2218cf053153SJunchao Zhang     PetscCall(MatDenseGetArrayRead(X, &barray));
2219cf053153SJunchao Zhang     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi));
2220cf053153SJunchao Zhang     PetscCall(MatDenseRestoreArrayRead(X, &barray));
22212b691707SHong Zhang 
22222b691707SHong Zhang     if (!mumps->myid) {
22239566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJGetArray(b->A, &aa));
22249566063dSJacob Faibussowitsch       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
222528b400f6SJacob Faibussowitsch       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
22269566063dSJacob Faibussowitsch       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2227cf053153SJunchao Zhang       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
22282b691707SHong Zhang     } else {
22292b691707SHong Zhang       mumps->id.irhs_ptr    = NULL;
22302b691707SHong Zhang       mumps->id.irhs_sparse = NULL;
22312b691707SHong Zhang       mumps->id.nz_rhs      = 0;
2232cf053153SJunchao Zhang       if (mumps->id.rhs_sparse_len) {
2233cf053153SJunchao Zhang         PetscCall(PetscFree(mumps->id.rhs_sparse));
2234cf053153SJunchao Zhang         mumps->id.rhs_sparse_len = 0;
2235cf053153SJunchao Zhang       }
22362b691707SHong Zhang     }
22372b691707SHong Zhang   }
22382b691707SHong Zhang 
2239801fbe65SHong Zhang   /* solve phase */
2240801fbe65SHong Zhang   mumps->id.job = JOB_SOLVE;
22413ab56b82SJunchao Zhang   PetscMUMPS_c(mumps);
22429261f6e4SBarry Smith   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2243801fbe65SHong Zhang 
2244f0b74427SPierre Jolivet   /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */
22459566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(X, &array));
22469566063dSJacob Faibussowitsch   PetscCall(VecPlaceArray(v_mpi, array));
2247801fbe65SHong Zhang 
2248334c5f61SHong Zhang   /* create scatter scat_sol */
22499566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRanges(X, &rstart));
2250f0b74427SPierre Jolivet   /* iidx: index for scatter mumps solution to PETSc X */
2251beae5ec0SHong Zhang 
22529566063dSJacob Faibussowitsch   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
22539566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
2254beae5ec0SHong Zhang   for (i = 0; i < lsol_loc; i++) {
2255beae5ec0SHong Zhang     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
2256beae5ec0SHong Zhang 
22572d4298aeSJunchao Zhang     for (proc = 0; proc < mumps->petsc_size; proc++) {
2258beae5ec0SHong Zhang       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
2259beae5ec0SHong Zhang         myrstart = rstart[proc];
2260f0b74427SPierre Jolivet         k        = isol_loc[i] - myrstart;          /* local index on 1st column of PETSc vector X */
2261f0b74427SPierre Jolivet         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to PETSc index in X */
2262beae5ec0SHong Zhang         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
2263beae5ec0SHong Zhang         break;
2264be818407SHong Zhang       }
2265be818407SHong Zhang     }
2266be818407SHong Zhang 
2267beae5ec0SHong Zhang     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
2268801fbe65SHong Zhang   }
22699566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
2270cf053153SJunchao Zhang   PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[]
22719566063dSJacob Faibussowitsch   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
22729566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
22739566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&is_from));
22749566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&is_to));
22759566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
22769566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(X, &array));
227771aed81dSHong Zhang 
2278cf053153SJunchao Zhang   if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers
2279cf053153SJunchao Zhang     mumps->id.sol_loc_len = 0;
2280cf053153SJunchao Zhang     PetscCall(PetscFree(mumps->id.sol_loc));
2281cf053153SJunchao Zhang   }
2282cf053153SJunchao Zhang 
2283cf053153SJunchao Zhang   // restore old values
2284cf053153SJunchao Zhang   mumps->id.sol_loc     = sol_loc_save;
2285cf053153SJunchao Zhang   mumps->id.sol_loc_len = sol_loc_len_save;
228671aed81dSHong Zhang   mumps->id.isol_loc    = isol_loc_save;
228771aed81dSHong Zhang 
22889566063dSJacob Faibussowitsch   PetscCall(PetscFree2(sol_loc, isol_loc));
22899566063dSJacob Faibussowitsch   PetscCall(PetscFree(idxx));
22909566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&msol_loc));
22919566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&v_mpi));
2292a6053eceSJunchao Zhang   if (!denseB) {
22932b691707SHong Zhang     if (!mumps->myid) {
2294d56c302dSHong Zhang       b = (Mat_MPIAIJ *)Bt->data;
22959566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
22969566063dSJacob Faibussowitsch       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
229728b400f6SJacob Faibussowitsch       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
22982b691707SHong Zhang     }
22992b691707SHong Zhang   } else {
230025aac85cSJunchao Zhang     if (mumps->ICNTL20 == 0) {
23019566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&b_seq));
23029566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&scat_rhs));
230325aac85cSJunchao Zhang     }
23042b691707SHong Zhang   }
23059566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&scat_sol));
230657508eceSPierre Jolivet   PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
23073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2308e0b74bf9SHong Zhang }
2309e0b74bf9SHong Zhang 
231066976f2fSJacob Faibussowitsch static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
2311d71ae5a4SJacob Faibussowitsch {
2312b18964edSHong Zhang   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2313338d3105SPierre Jolivet   const PetscMUMPSInt value = mumps->id.ICNTL(9);
2314b18964edSHong Zhang 
2315b18964edSHong Zhang   PetscFunctionBegin;
2316b18964edSHong Zhang   mumps->id.ICNTL(9) = 0;
2317b18964edSHong Zhang   PetscCall(MatMatSolve_MUMPS(A, B, X));
2318338d3105SPierre Jolivet   mumps->id.ICNTL(9) = value;
23193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2320b18964edSHong Zhang }
2321b18964edSHong Zhang 
232266976f2fSJacob Faibussowitsch static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
2323d71ae5a4SJacob Faibussowitsch {
2324eb3ef3b2SHong Zhang   PetscBool flg;
2325eb3ef3b2SHong Zhang   Mat       B;
2326eb3ef3b2SHong Zhang 
2327eb3ef3b2SHong Zhang   PetscFunctionBegin;
23289566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
232928b400f6SJacob Faibussowitsch   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
2330eb3ef3b2SHong Zhang 
2331eb3ef3b2SHong Zhang   /* Create B=Bt^T that uses Bt's data structure */
23329566063dSJacob Faibussowitsch   PetscCall(MatCreateTranspose(Bt, &B));
2333eb3ef3b2SHong Zhang 
23349566063dSJacob Faibussowitsch   PetscCall(MatMatSolve_MUMPS(A, B, X));
23359566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
23363ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2337eb3ef3b2SHong Zhang }
2338eb3ef3b2SHong Zhang 
2339ace3df97SHong Zhang #if !defined(PETSC_USE_COMPLEX)
2340a58c3f20SHong Zhang /*
2341a58c3f20SHong Zhang   input:
2342a58c3f20SHong Zhang    F:        numeric factor
2343a58c3f20SHong Zhang   output:
2344a58c3f20SHong Zhang    nneg:     total number of negative pivots
234519d49a3bSHong Zhang    nzero:    total number of zero pivots
234619d49a3bSHong Zhang    npos:     (global dimension of F) - nneg - nzero
2347a58c3f20SHong Zhang */
234866976f2fSJacob Faibussowitsch static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
2349d71ae5a4SJacob Faibussowitsch {
2350e69c285eSBarry Smith   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
2351c1490034SHong Zhang   PetscMPIInt size;
2352a58c3f20SHong Zhang 
2353a58c3f20SHong Zhang   PetscFunctionBegin;
23549566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
2355bcb30aebSHong Zhang   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
2356aed4548fSBarry Smith   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
2357ed85ac9fSHong Zhang 
2358710ac8efSHong Zhang   if (nneg) *nneg = mumps->id.INFOG(12);
2359ed85ac9fSHong Zhang   if (nzero || npos) {
236008401ef6SPierre Jolivet     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2361710ac8efSHong Zhang     if (nzero) *nzero = mumps->id.INFOG(28);
2362710ac8efSHong Zhang     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
2363a58c3f20SHong Zhang   }
23643ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2365a58c3f20SHong Zhang }
236619d49a3bSHong Zhang #endif
2367a58c3f20SHong Zhang 
236866976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
2369d71ae5a4SJacob Faibussowitsch {
23706497c311SBarry Smith   PetscMPIInt    nreqs;
2371a6053eceSJunchao Zhang   PetscMUMPSInt *irn, *jcn;
2372a6053eceSJunchao Zhang   PetscMPIInt    count;
23736497c311SBarry Smith   PetscCount     totnnz, remain;
2374a6053eceSJunchao Zhang   const PetscInt osize = mumps->omp_comm_size;
2375a6053eceSJunchao Zhang   PetscScalar   *val;
23763ab56b82SJunchao Zhang 
23773ab56b82SJunchao Zhang   PetscFunctionBegin;
2378a6053eceSJunchao Zhang   if (osize > 1) {
23793ab56b82SJunchao Zhang     if (reuse == MAT_INITIAL_MATRIX) {
23803ab56b82SJunchao Zhang       /* master first gathers counts of nonzeros to receive */
23819566063dSJacob Faibussowitsch       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
23829566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
23833ab56b82SJunchao Zhang 
2384a6053eceSJunchao Zhang       /* Then each computes number of send/recvs */
23853ab56b82SJunchao Zhang       if (mumps->is_omp_master) {
2386a6053eceSJunchao Zhang         /* Start from 1 since self communication is not done in MPI */
2387a6053eceSJunchao Zhang         nreqs = 0;
23886497c311SBarry Smith         for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
2389a6053eceSJunchao Zhang       } else {
23906497c311SBarry Smith         nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX));
23913ab56b82SJunchao Zhang       }
239235cb6cd3SPierre Jolivet       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
23933ab56b82SJunchao Zhang 
2394a6053eceSJunchao Zhang       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
2395a6053eceSJunchao Zhang          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
2396a6053eceSJunchao Zhang          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
2397a6053eceSJunchao Zhang          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
2398a6053eceSJunchao Zhang        */
2399a6053eceSJunchao Zhang       nreqs = 0; /* counter for actual send/recvs */
24003ab56b82SJunchao Zhang       if (mumps->is_omp_master) {
24016497c311SBarry Smith         totnnz = 0;
24026497c311SBarry Smith 
24036497c311SBarry Smith         for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
24049566063dSJacob Faibussowitsch         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
24059566063dSJacob Faibussowitsch         PetscCall(PetscMalloc1(totnnz, &val));
2406a6053eceSJunchao Zhang 
2407a6053eceSJunchao Zhang         /* Self communication */
24089566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
24099566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
24109566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
2411a6053eceSJunchao Zhang 
2412a6053eceSJunchao Zhang         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
24139566063dSJacob Faibussowitsch         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
24149566063dSJacob Faibussowitsch         PetscCall(PetscFree(mumps->val_alloc));
2415a6053eceSJunchao Zhang         mumps->nnz = totnnz;
24163ab56b82SJunchao Zhang         mumps->irn = irn;
24173ab56b82SJunchao Zhang         mumps->jcn = jcn;
2418a6053eceSJunchao Zhang         mumps->val = mumps->val_alloc = val;
2419a6053eceSJunchao Zhang 
2420a6053eceSJunchao Zhang         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
2421a6053eceSJunchao Zhang         jcn += mumps->recvcount[0];
2422a6053eceSJunchao Zhang         val += mumps->recvcount[0];
2423a6053eceSJunchao Zhang 
2424a6053eceSJunchao Zhang         /* Remote communication */
24256497c311SBarry Smith         for (PetscMPIInt i = 1; i < osize; i++) {
24266497c311SBarry Smith           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2427a6053eceSJunchao Zhang           remain = mumps->recvcount[i] - count;
2428a6053eceSJunchao Zhang           while (count > 0) {
24296497c311SBarry Smith             PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
24306497c311SBarry Smith             PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
24316497c311SBarry Smith             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2432a6053eceSJunchao Zhang             irn += count;
2433a6053eceSJunchao Zhang             jcn += count;
2434a6053eceSJunchao Zhang             val += count;
24356497c311SBarry Smith             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2436a6053eceSJunchao Zhang             remain -= count;
2437a6053eceSJunchao Zhang           }
24383ab56b82SJunchao Zhang         }
24393ab56b82SJunchao Zhang       } else {
2440a6053eceSJunchao Zhang         irn    = mumps->irn;
2441a6053eceSJunchao Zhang         jcn    = mumps->jcn;
2442a6053eceSJunchao Zhang         val    = mumps->val;
24436497c311SBarry Smith         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2444a6053eceSJunchao Zhang         remain = mumps->nnz - count;
2445a6053eceSJunchao Zhang         while (count > 0) {
24466497c311SBarry Smith           PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
24476497c311SBarry Smith           PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
24486497c311SBarry Smith           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2449a6053eceSJunchao Zhang           irn += count;
2450a6053eceSJunchao Zhang           jcn += count;
2451a6053eceSJunchao Zhang           val += count;
24526497c311SBarry Smith           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2453a6053eceSJunchao Zhang           remain -= count;
24543ab56b82SJunchao Zhang         }
24553ab56b82SJunchao Zhang       }
2456a6053eceSJunchao Zhang     } else {
2457a6053eceSJunchao Zhang       nreqs = 0;
2458a6053eceSJunchao Zhang       if (mumps->is_omp_master) {
2459a6053eceSJunchao Zhang         val = mumps->val + mumps->recvcount[0];
24606497c311SBarry Smith         for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
24616497c311SBarry Smith           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2462a6053eceSJunchao Zhang           remain = mumps->recvcount[i] - count;
2463a6053eceSJunchao Zhang           while (count > 0) {
24646497c311SBarry Smith             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2465a6053eceSJunchao Zhang             val += count;
24666497c311SBarry Smith             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2467a6053eceSJunchao Zhang             remain -= count;
2468a6053eceSJunchao Zhang           }
2469a6053eceSJunchao Zhang         }
2470a6053eceSJunchao Zhang       } else {
2471a6053eceSJunchao Zhang         val    = mumps->val;
24726497c311SBarry Smith         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2473a6053eceSJunchao Zhang         remain = mumps->nnz - count;
2474a6053eceSJunchao Zhang         while (count > 0) {
24756497c311SBarry Smith           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2476a6053eceSJunchao Zhang           val += count;
24776497c311SBarry Smith           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2478a6053eceSJunchao Zhang           remain -= count;
2479a6053eceSJunchao Zhang         }
2480a6053eceSJunchao Zhang       }
2481a6053eceSJunchao Zhang     }
24829566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
2483a6053eceSJunchao Zhang     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
2484a6053eceSJunchao Zhang   }
24853ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
24863ab56b82SJunchao Zhang }
24873ab56b82SJunchao Zhang 
2488d2a308c1SPierre Jolivet static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info)
2489d71ae5a4SJacob Faibussowitsch {
249057508eceSPierre Jolivet   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2491397b6df1SKris Buschelman 
2492397b6df1SKris Buschelman   PetscFunctionBegin;
2493dbf6bb8dSprj-   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
249448a46eb9SPierre Jolivet     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
24959566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
24963ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
24972aca8efcSHong Zhang   }
24986baea169SHong Zhang 
24999566063dSJacob Faibussowitsch   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
25009566063dSJacob Faibussowitsch   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
2501397b6df1SKris Buschelman 
2502397b6df1SKris Buschelman   /* numerical factorization phase */
2503a5e57a09SHong Zhang   mumps->id.job = JOB_FACTNUMERIC;
25044e34a73bSHong Zhang   if (!mumps->id.ICNTL(18)) { /* A is centralized */
2505cf053153SJunchao Zhang     if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2506397b6df1SKris Buschelman   } else {
2507cf053153SJunchao Zhang     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2508397b6df1SKris Buschelman   }
2509cf053153SJunchao Zhang 
2510cf053153SJunchao Zhang   if (F->schur) {
2511cf053153SJunchao Zhang     const PetscScalar *array;
2512cf053153SJunchao Zhang     MUMPS_INT          size = mumps->id.size_schur;
2513cf053153SJunchao Zhang     PetscCall(MatDenseGetArrayRead(F->schur, &array));
2514cf053153SJunchao Zhang     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur));
2515cf053153SJunchao Zhang     PetscCall(MatDenseRestoreArrayRead(F->schur, &array));
2516cf053153SJunchao Zhang   }
2517cf053153SJunchao Zhang 
25183ab56b82SJunchao Zhang   PetscMUMPS_c(mumps);
2519a5e57a09SHong Zhang   if (mumps->id.INFOG(1) < 0) {
25209261f6e4SBarry Smith     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
25219261f6e4SBarry Smith     if (mumps->id.INFOG(1) == -10) {
25229261f6e4SBarry Smith       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2523603e8f96SBarry Smith       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2524c0d63f2fSHong Zhang     } else if (mumps->id.INFOG(1) == -13) {
25259261f6e4SBarry Smith       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2526603e8f96SBarry Smith       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2527c0d63f2fSHong Zhang     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2528bdcd51b8SPierre Jolivet       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2529603e8f96SBarry Smith       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
25302aca8efcSHong Zhang     } else {
25319261f6e4SBarry Smith       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2532603e8f96SBarry Smith       F->factorerrortype = MAT_FACTOR_OTHER;
2533151787a6SHong Zhang     }
25342aca8efcSHong Zhang   }
25359261f6e4SBarry Smith   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));
2536397b6df1SKris Buschelman 
2537b3cb21ddSStefano Zampini   F->assembled = PETSC_TRUE;
2538d47f36abSHong Zhang 
2539b3cb21ddSStefano Zampini   if (F->schur) { /* reset Schur status to unfactored */
25403cb7dd0eSStefano Zampini #if defined(PETSC_HAVE_CUDA)
2541c70f7ee4SJunchao Zhang     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
25423cb7dd0eSStefano Zampini #endif
2543cf053153SJunchao Zhang     PetscScalar *array;
2544cf053153SJunchao Zhang     PetscCall(MatDenseGetArray(F->schur, &array));
2545cf053153SJunchao Zhang     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array));
2546cf053153SJunchao Zhang     PetscCall(MatDenseRestoreArray(F->schur, &array));
2547b3cb21ddSStefano Zampini     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2548b3cb21ddSStefano Zampini       mumps->id.ICNTL(19) = 2;
25499566063dSJacob Faibussowitsch       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2550b3cb21ddSStefano Zampini     }
25519566063dSJacob Faibussowitsch     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2552b3cb21ddSStefano Zampini   }
255367877ebaSShri Abhyankar 
2554066565c5SStefano Zampini   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2555066565c5SStefano Zampini   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
2556066565c5SStefano Zampini 
25573ab56b82SJunchao Zhang   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2558cf053153SJunchao Zhang   // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the
2559cf053153SJunchao Zhang   // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for
2560cf053153SJunchao Zhang   // which the solution (in SOL_loc) is available on the local processor.
2561cf053153SJunchao Zhang   // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will
2562cf053153SJunchao Zhang   // normally have the same contents for each of these calls. The only exception is the case of
2563cf053153SJunchao Zhang   // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non
2564cf053153SJunchao Zhang   // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase.
25652d4298aeSJunchao Zhang   if (mumps->petsc_size > 1) {
256667877ebaSShri Abhyankar     PetscInt     lsol_loc;
2567cf053153SJunchao Zhang     PetscScalar *array;
2568c2093ab7SHong Zhang 
2569c2093ab7SHong Zhang     /* distributed solution; Create x_seq=sol_loc for repeated use */
2570c2093ab7SHong Zhang     if (mumps->x_seq) {
25719566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&mumps->scat_sol));
2572cf053153SJunchao Zhang       PetscCall(PetscFree(mumps->id.isol_loc));
25739566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&mumps->x_seq));
2574c2093ab7SHong Zhang     }
2575a5e57a09SHong Zhang     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2576cf053153SJunchao Zhang     PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc));
2577cf053153SJunchao Zhang     PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq));
2578cf053153SJunchao Zhang     PetscCall(VecGetArray(mumps->x_seq, &array));
2579cf053153SJunchao Zhang     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2580cf053153SJunchao Zhang     PetscCall(VecRestoreArray(mumps->x_seq, &array));
25816497c311SBarry Smith     mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc;
258267877ebaSShri Abhyankar   }
2583cf053153SJunchao Zhang   PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2)));
25843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2585397b6df1SKris Buschelman }
2586397b6df1SKris Buschelman 
25879a2535b5SHong Zhang /* Sets MUMPS options from the options database */
258866976f2fSJacob Faibussowitsch static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2589d71ae5a4SJacob Faibussowitsch {
2590e69c285eSBarry Smith   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
2591cf053153SJunchao Zhang   PetscReal     cntl;
2592413bcc21SPierre Jolivet   PetscMUMPSInt icntl = 0, size, *listvar_schur;
259345e3843bSPierre Jolivet   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
2594cf053153SJunchao Zhang   PetscBool     flg   = PETSC_FALSE;
2595cf053153SJunchao Zhang   PetscBool     schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1);
2596cf053153SJunchao Zhang   void         *arr;
2597dcd589f8SShri Abhyankar 
2598dcd589f8SShri Abhyankar   PetscFunctionBegin;
259926cc229bSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2600413bcc21SPierre Jolivet   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2601cf053153SJunchao Zhang     PetscPrecision precision  = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE;
2602413bcc21SPierre Jolivet     PetscInt       nthreads   = 0;
2603413bcc21SPierre Jolivet     PetscInt       nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2604413bcc21SPierre Jolivet     PetscInt       nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
260593d70b8aSPierre Jolivet     PetscMUMPSInt  nblk, *blkvar, *blkptr;
2606413bcc21SPierre Jolivet 
2607413bcc21SPierre Jolivet     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2608413bcc21SPierre Jolivet     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2609413bcc21SPierre Jolivet     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
2610413bcc21SPierre Jolivet 
2611413bcc21SPierre Jolivet     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2612413bcc21SPierre Jolivet     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2613413bcc21SPierre Jolivet     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2614413bcc21SPierre Jolivet     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2615413bcc21SPierre Jolivet     if (mumps->use_petsc_omp_support) {
2616413bcc21SPierre Jolivet       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2617413bcc21SPierre Jolivet #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2618413bcc21SPierre Jolivet       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2619413bcc21SPierre Jolivet       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2620ea17275aSJose E. Roman #else
2621ea17275aSJose E. Roman       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2622ea17275aSJose E. Roman               ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2623413bcc21SPierre Jolivet #endif
2624413bcc21SPierre Jolivet     } else {
2625413bcc21SPierre Jolivet       mumps->omp_comm      = PETSC_COMM_SELF;
2626413bcc21SPierre Jolivet       mumps->mumps_comm    = mumps->petsc_comm;
2627413bcc21SPierre Jolivet       mumps->is_omp_master = PETSC_TRUE;
2628413bcc21SPierre Jolivet     }
2629413bcc21SPierre Jolivet     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2630413bcc21SPierre Jolivet     mumps->reqs = NULL;
2631413bcc21SPierre Jolivet     mumps->tag  = 0;
2632413bcc21SPierre Jolivet 
2633413bcc21SPierre Jolivet     if (mumps->mumps_comm != MPI_COMM_NULL) {
2634413bcc21SPierre Jolivet       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2635413bcc21SPierre Jolivet         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2636413bcc21SPierre Jolivet         MPI_Comm comm;
2637413bcc21SPierre Jolivet         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2638413bcc21SPierre Jolivet         mumps->mumps_comm = comm;
2639413bcc21SPierre Jolivet       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2640413bcc21SPierre Jolivet     }
2641413bcc21SPierre Jolivet 
2642413bcc21SPierre Jolivet     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2643413bcc21SPierre Jolivet     mumps->id.job          = JOB_INIT;
2644413bcc21SPierre Jolivet     mumps->id.par          = 1; /* host participates factorizaton and solve */
2645413bcc21SPierre Jolivet     mumps->id.sym          = mumps->sym;
2646413bcc21SPierre Jolivet 
2647413bcc21SPierre Jolivet     size          = mumps->id.size_schur;
2648413bcc21SPierre Jolivet     arr           = mumps->id.schur;
2649413bcc21SPierre Jolivet     listvar_schur = mumps->id.listvar_schur;
265093d70b8aSPierre Jolivet     nblk          = mumps->id.nblk;
265193d70b8aSPierre Jolivet     blkvar        = mumps->id.blkvar;
265293d70b8aSPierre Jolivet     blkptr        = mumps->id.blkptr;
265396eb7ee0SStefano Zampini     if (PetscDefined(USE_DEBUG)) {
265496eb7ee0SStefano Zampini       for (PetscInt i = 0; i < size; i++)
265596eb7ee0SStefano Zampini         PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1,
265696eb7ee0SStefano Zampini                    A->rmap->N);
265796eb7ee0SStefano Zampini     }
265896eb7ee0SStefano Zampini 
2659cf053153SJunchao Zhang     PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL));
2660cf053153SJunchao Zhang     PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]);
2661cf053153SJunchao Zhang     PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar");
2662cf053153SJunchao Zhang     PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision));
2663cf053153SJunchao Zhang 
2664413bcc21SPierre Jolivet     PetscMUMPS_c(mumps);
26659261f6e4SBarry Smith     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
266651ad14ebSPierre Jolivet 
266751ad14ebSPierre Jolivet     /* set PETSc-MUMPS default options - override MUMPS default */
266851ad14ebSPierre Jolivet     mumps->id.ICNTL(3) = 0;
266951ad14ebSPierre Jolivet     mumps->id.ICNTL(4) = 0;
267051ad14ebSPierre Jolivet     if (mumps->petsc_size == 1) {
267151ad14ebSPierre Jolivet       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
267251ad14ebSPierre Jolivet       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
267351ad14ebSPierre Jolivet     } else {
267451ad14ebSPierre Jolivet       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
267551ad14ebSPierre Jolivet       mumps->id.ICNTL(21) = 1; /* distributed solution */
267651ad14ebSPierre Jolivet     }
267793d70b8aSPierre Jolivet     if (nblk && blkptr) {
267893d70b8aSPierre Jolivet       mumps->id.ICNTL(15) = 1;
267993d70b8aSPierre Jolivet       mumps->id.nblk      = nblk;
268093d70b8aSPierre Jolivet       mumps->id.blkvar    = blkvar;
268193d70b8aSPierre Jolivet       mumps->id.blkptr    = blkptr;
2682cf053153SJunchao Zhang     } else mumps->id.ICNTL(15) = 0;
268351ad14ebSPierre Jolivet 
2684413bcc21SPierre Jolivet     /* restore cached ICNTL and CNTL values */
2685413bcc21SPierre Jolivet     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2686cf053153SJunchao Zhang     for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]);
2687cf053153SJunchao Zhang 
2688413bcc21SPierre Jolivet     PetscCall(PetscFree(mumps->ICNTL_pre));
2689413bcc21SPierre Jolivet     PetscCall(PetscFree(mumps->CNTL_pre));
2690413bcc21SPierre Jolivet 
2691413bcc21SPierre Jolivet     if (schur) {
2692413bcc21SPierre Jolivet       mumps->id.size_schur    = size;
2693413bcc21SPierre Jolivet       mumps->id.schur_lld     = size;
2694413bcc21SPierre Jolivet       mumps->id.schur         = arr;
2695413bcc21SPierre Jolivet       mumps->id.listvar_schur = listvar_schur;
2696413bcc21SPierre Jolivet       if (mumps->petsc_size > 1) {
2697413bcc21SPierre Jolivet         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
2698413bcc21SPierre Jolivet 
2699413bcc21SPierre Jolivet         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2700413bcc21SPierre Jolivet         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
27015440e5dcSBarry Smith         PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm));
2702413bcc21SPierre Jolivet         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2703413bcc21SPierre Jolivet       } else {
2704413bcc21SPierre Jolivet         if (F->factortype == MAT_FACTOR_LU) {
2705413bcc21SPierre Jolivet           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2706413bcc21SPierre Jolivet         } else {
2707413bcc21SPierre Jolivet           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2708413bcc21SPierre Jolivet         }
2709413bcc21SPierre Jolivet       }
2710413bcc21SPierre Jolivet       mumps->id.ICNTL(26) = -1;
2711413bcc21SPierre Jolivet     }
2712413bcc21SPierre Jolivet 
2713413bcc21SPierre Jolivet     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2714413bcc21SPierre Jolivet        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2715413bcc21SPierre Jolivet      */
2716413bcc21SPierre Jolivet     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2717cf053153SJunchao Zhang     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm));
2718413bcc21SPierre Jolivet 
2719413bcc21SPierre Jolivet     mumps->scat_rhs = NULL;
2720413bcc21SPierre Jolivet     mumps->scat_sol = NULL;
2721413bcc21SPierre Jolivet   }
27229566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
27239a2535b5SHong Zhang   if (flg) mumps->id.ICNTL(1) = icntl;
27249566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
27259a2535b5SHong Zhang   if (flg) mumps->id.ICNTL(2) = icntl;
27269566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
27279a2535b5SHong Zhang   if (flg) mumps->id.ICNTL(3) = icntl;
2728dcd589f8SShri Abhyankar 
27299566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
27309a2535b5SHong Zhang   if (flg) mumps->id.ICNTL(4) = icntl;
27319a2535b5SHong Zhang   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
27329a2535b5SHong Zhang 
27339566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
27349a2535b5SHong Zhang   if (flg) mumps->id.ICNTL(6) = icntl;
27359a2535b5SHong Zhang 
27369566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2737dcd589f8SShri Abhyankar   if (flg) {
2738aed4548fSBarry Smith     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2739b53c1a7fSBarry Smith     mumps->id.ICNTL(7) = icntl;
2740dcd589f8SShri Abhyankar   }
2741e0b74bf9SHong Zhang 
27429566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
27439566063dSJacob Faibussowitsch   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
27449566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
27459566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
27469566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
27479566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
27489566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
274945e3843bSPierre Jolivet   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
27506497c311SBarry Smith   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs;
275145e3843bSPierre Jolivet   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
275245e3843bSPierre Jolivet   if (flg) {
275393d70b8aSPierre Jolivet     if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
275493d70b8aSPierre Jolivet     else if (mumps->id.ICNTL(15) > 0) {
275593d70b8aSPierre Jolivet       const PetscInt *bsizes;
275693d70b8aSPierre Jolivet       PetscInt        nblocks, p, *blkptr = NULL;
275793d70b8aSPierre Jolivet       PetscMPIInt    *recvcounts, *displs, n;
275893d70b8aSPierre Jolivet       PetscMPIInt     rank, size = 0;
275993d70b8aSPierre Jolivet 
276093d70b8aSPierre Jolivet       PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes));
276193d70b8aSPierre Jolivet       flg = PETSC_TRUE;
276293d70b8aSPierre Jolivet       for (p = 0; p < nblocks; ++p) {
276393d70b8aSPierre Jolivet         if (bsizes[p] > 1) break;
276493d70b8aSPierre Jolivet       }
276593d70b8aSPierre Jolivet       if (p == nblocks) flg = PETSC_FALSE;
27665440e5dcSBarry Smith       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
276793d70b8aSPierre Jolivet       if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1
276893d70b8aSPierre Jolivet         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
276993d70b8aSPierre Jolivet         if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
277093d70b8aSPierre Jolivet         PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs));
277193d70b8aSPierre Jolivet         PetscCall(PetscMPIIntCast(nblocks, &n));
277293d70b8aSPierre Jolivet         PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A)));
277393d70b8aSPierre Jolivet         for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p];
277493d70b8aSPierre Jolivet         PetscCall(PetscMalloc1(displs[size] + 1, &blkptr));
277593d70b8aSPierre Jolivet         PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
277693d70b8aSPierre Jolivet         PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
277793d70b8aSPierre Jolivet         if (rank == 0) {
277893d70b8aSPierre Jolivet           blkptr[0] = 1;
277993d70b8aSPierre Jolivet           for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p];
278093d70b8aSPierre Jolivet           PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr));
278193d70b8aSPierre Jolivet         }
278293d70b8aSPierre Jolivet         PetscCall(PetscFree2(recvcounts, displs));
278393d70b8aSPierre Jolivet         PetscCall(PetscFree(blkptr));
278493d70b8aSPierre Jolivet       }
278593d70b8aSPierre Jolivet     }
278645e3843bSPierre Jolivet   }
27879566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
278859ac8732SStefano Zampini   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
27899566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&F->schur));
27909566063dSJacob Faibussowitsch     PetscCall(MatMumpsResetSchur_Private(mumps));
279159ac8732SStefano Zampini   }
279225aac85cSJunchao Zhang 
279343f3b051SJunchao Zhang   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
279443f3b051SJunchao Zhang      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
279525aac85cSJunchao Zhang      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
279643f3b051SJunchao Zhang      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
279743f3b051SJunchao Zhang      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
279841caa250SJunchao Zhang      In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
279925aac85cSJunchao Zhang    */
2800c183326eSPierre Jolivet   mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */
2801c183326eSPierre Jolivet #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI)
2802c183326eSPierre Jolivet   mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */
280325aac85cSJunchao Zhang #endif
28049566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2805aed4548fSBarry Smith   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
280625aac85cSJunchao Zhang #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2807aed4548fSBarry Smith   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
280825aac85cSJunchao Zhang #endif
28099566063dSJacob Faibussowitsch   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
28109a2535b5SHong Zhang 
28119566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
28129566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
28139566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2814ac530a7eSPierre Jolivet   if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
2815d7ebd59bSHong Zhang 
28169566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
28179566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
28189566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2819fa6fd9d0SPierre Jolivet   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
28209566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
28219566063dSJacob Faibussowitsch   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
28229566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2823145b44c9SPierre Jolivet   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
28249566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
28259566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
28269566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
282750ea2040Saszaboa   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL));
28289566063dSJacob Faibussowitsch   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2829c92b4f89SPierre Jolivet   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL));
2830*7cd49bdeStom.caruso   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_49", "ICNTL(49): compact workarray at the end of factorization phase", "None", mumps->id.ICNTL(49), &mumps->id.ICNTL(49), NULL));
283191b026caSPierre Jolivet   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL));
2832146931dbSPierre Jolivet   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));
2833dcd589f8SShri Abhyankar 
2834cf053153SJunchao Zhang   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg));
2835cf053153SJunchao Zhang   if (flg) ID_CNTL_SET(mumps->id, 1, cntl);
2836cf053153SJunchao Zhang   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg));
2837cf053153SJunchao Zhang   if (flg) ID_CNTL_SET(mumps->id, 2, cntl);
2838cf053153SJunchao Zhang   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg));
2839cf053153SJunchao Zhang   if (flg) ID_CNTL_SET(mumps->id, 3, cntl);
2840cf053153SJunchao Zhang   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg));
2841cf053153SJunchao Zhang   if (flg) ID_CNTL_SET(mumps->id, 4, cntl);
2842cf053153SJunchao Zhang   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg));
2843cf053153SJunchao Zhang   if (flg) ID_CNTL_SET(mumps->id, 5, cntl);
2844cf053153SJunchao Zhang   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg));
2845cf053153SJunchao Zhang   if (flg) ID_CNTL_SET(mumps->id, 7, cntl);
2846e5bb22a1SHong Zhang 
28479566063dSJacob Faibussowitsch   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
2848b34f08ffSHong Zhang 
28499566063dSJacob Faibussowitsch   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2850b34f08ffSHong Zhang   if (ninfo) {
285108401ef6SPierre Jolivet     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
28529566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2853b34f08ffSHong Zhang     mumps->ninfo = ninfo;
2854b34f08ffSHong Zhang     for (i = 0; i < ninfo; i++) {
2855aed4548fSBarry Smith       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2856f7d195e4SLawrence Mitchell       mumps->info[i] = info[i];
2857b34f08ffSHong Zhang     }
2858b34f08ffSHong Zhang   }
2859d0609cedSBarry Smith   PetscOptionsEnd();
28603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2861dcd589f8SShri Abhyankar }
2862dcd589f8SShri Abhyankar 
2863d2a308c1SPierre Jolivet static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps)
2864d71ae5a4SJacob Faibussowitsch {
28655cd7cf9dSHong Zhang   PetscFunctionBegin;
28665cd7cf9dSHong Zhang   if (mumps->id.INFOG(1) < 0) {
28679261f6e4SBarry Smith     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
28685cd7cf9dSHong Zhang     if (mumps->id.INFOG(1) == -6) {
28699261f6e4SBarry Smith       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2870603e8f96SBarry Smith       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
28715cd7cf9dSHong Zhang     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
28729261f6e4SBarry Smith       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2873603e8f96SBarry Smith       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
28745cd7cf9dSHong Zhang     } else {
28759261f6e4SBarry Smith       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2876603e8f96SBarry Smith       F->factorerrortype = MAT_FACTOR_OTHER;
28775cd7cf9dSHong Zhang     }
28785cd7cf9dSHong Zhang   }
287972b150d8SStefano Zampini   if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR;
28803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28815cd7cf9dSHong Zhang }
28825cd7cf9dSHong Zhang 
2883d2a308c1SPierre Jolivet static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2884d71ae5a4SJacob Faibussowitsch {
2885e69c285eSBarry Smith   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
288667877ebaSShri Abhyankar   Vec            b;
288767877ebaSShri Abhyankar   const PetscInt M = A->rmap->N;
2888397b6df1SKris Buschelman 
2889397b6df1SKris Buschelman   PetscFunctionBegin;
2890d47f36abSHong Zhang   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2891d47f36abSHong Zhang     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
28923ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2893d47f36abSHong Zhang   }
2894dcd589f8SShri Abhyankar 
28959a2535b5SHong Zhang   /* Set MUMPS options from the options database */
289626cc229bSBarry Smith   PetscCall(MatSetFromOptions_MUMPS(F, A));
2897dcd589f8SShri Abhyankar 
28989566063dSJacob Faibussowitsch   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
28999566063dSJacob Faibussowitsch   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2900dcd589f8SShri Abhyankar 
290167877ebaSShri Abhyankar   /* analysis phase */
2902a5e57a09SHong Zhang   mumps->id.job = JOB_FACTSYMBOLIC;
29036497c311SBarry Smith   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2904a5e57a09SHong Zhang   switch (mumps->id.ICNTL(18)) {
290567877ebaSShri Abhyankar   case 0: /* centralized assembled matrix input */
2906a5e57a09SHong Zhang     if (!mumps->myid) {
2907a6053eceSJunchao Zhang       mumps->id.nnz = mumps->nnz;
2908a6053eceSJunchao Zhang       mumps->id.irn = mumps->irn;
2909a6053eceSJunchao Zhang       mumps->id.jcn = mumps->jcn;
2910cf053153SJunchao Zhang       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
291151ad14ebSPierre Jolivet       if (r && mumps->id.ICNTL(7) == 7) {
29124ac6704cSBarry Smith         mumps->id.ICNTL(7) = 1;
2913a5e57a09SHong Zhang         if (!mumps->myid) {
2914e0b74bf9SHong Zhang           const PetscInt *idx;
2915a6053eceSJunchao Zhang           PetscInt        i;
29162205254eSKarl Rupp 
29179566063dSJacob Faibussowitsch           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
29189566063dSJacob Faibussowitsch           PetscCall(ISGetIndices(r, &idx));
2919f4f49eeaSPierre Jolivet           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
29209566063dSJacob Faibussowitsch           PetscCall(ISRestoreIndices(r, &idx));
2921e0b74bf9SHong Zhang         }
2922e0b74bf9SHong Zhang       }
292367877ebaSShri Abhyankar     }
292467877ebaSShri Abhyankar     break;
292567877ebaSShri Abhyankar   case 3: /* distributed assembled matrix input (size>1) */
2926a6053eceSJunchao Zhang     mumps->id.nnz_loc = mumps->nnz;
2927a6053eceSJunchao Zhang     mumps->id.irn_loc = mumps->irn;
2928a6053eceSJunchao Zhang     mumps->id.jcn_loc = mumps->jcn;
2929cf053153SJunchao Zhang     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
293025aac85cSJunchao Zhang     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
29319566063dSJacob Faibussowitsch       PetscCall(MatCreateVecs(A, NULL, &b));
29329566063dSJacob Faibussowitsch       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
29339566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&b));
293425aac85cSJunchao Zhang     }
293567877ebaSShri Abhyankar     break;
293667877ebaSShri Abhyankar   }
29373ab56b82SJunchao Zhang   PetscMUMPS_c(mumps);
29389566063dSJacob Faibussowitsch   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
293967877ebaSShri Abhyankar 
2940719d5645SBarry Smith   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2941dcd589f8SShri Abhyankar   F->ops->solve             = MatSolve_MUMPS;
294251d5961aSHong Zhang   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
29434e34a73bSHong Zhang   F->ops->matsolve          = MatMatSolve_MUMPS;
2944eb3ef3b2SHong Zhang   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2945b18964edSHong Zhang   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2946d47f36abSHong Zhang 
2947d47f36abSHong Zhang   mumps->matstruc = SAME_NONZERO_PATTERN;
29483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2949b24902e0SBarry Smith }
2950b24902e0SBarry Smith 
2951f0b74427SPierre Jolivet /* Note the PETSc r and c permutations are ignored */
2952d2a308c1SPierre Jolivet static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2953d71ae5a4SJacob Faibussowitsch {
2954e69c285eSBarry Smith   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
295567877ebaSShri Abhyankar   Vec            b;
295667877ebaSShri Abhyankar   const PetscInt M = A->rmap->N;
2957450b117fSShri Abhyankar 
2958450b117fSShri Abhyankar   PetscFunctionBegin;
2959d47f36abSHong Zhang   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2960338d3105SPierre Jolivet     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
29613ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2962d47f36abSHong Zhang   }
2963dcd589f8SShri Abhyankar 
29649a2535b5SHong Zhang   /* Set MUMPS options from the options database */
296526cc229bSBarry Smith   PetscCall(MatSetFromOptions_MUMPS(F, A));
2966dcd589f8SShri Abhyankar 
29679566063dSJacob Faibussowitsch   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
29689566063dSJacob Faibussowitsch   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
296967877ebaSShri Abhyankar 
297067877ebaSShri Abhyankar   /* analysis phase */
2971a5e57a09SHong Zhang   mumps->id.job = JOB_FACTSYMBOLIC;
29726497c311SBarry Smith   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2973a5e57a09SHong Zhang   switch (mumps->id.ICNTL(18)) {
297467877ebaSShri Abhyankar   case 0: /* centralized assembled matrix input */
2975a5e57a09SHong Zhang     if (!mumps->myid) {
2976a6053eceSJunchao Zhang       mumps->id.nnz = mumps->nnz;
2977a6053eceSJunchao Zhang       mumps->id.irn = mumps->irn;
2978a6053eceSJunchao Zhang       mumps->id.jcn = mumps->jcn;
2979cf053153SJunchao Zhang       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
298067877ebaSShri Abhyankar     }
298167877ebaSShri Abhyankar     break;
298267877ebaSShri Abhyankar   case 3: /* distributed assembled matrix input (size>1) */
2983a6053eceSJunchao Zhang     mumps->id.nnz_loc = mumps->nnz;
2984a6053eceSJunchao Zhang     mumps->id.irn_loc = mumps->irn;
2985a6053eceSJunchao Zhang     mumps->id.jcn_loc = mumps->jcn;
2986cf053153SJunchao Zhang     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
298725aac85cSJunchao Zhang     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
29889566063dSJacob Faibussowitsch       PetscCall(MatCreateVecs(A, NULL, &b));
29899566063dSJacob Faibussowitsch       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
29909566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&b));
299125aac85cSJunchao Zhang     }
299267877ebaSShri Abhyankar     break;
299367877ebaSShri Abhyankar   }
29943ab56b82SJunchao Zhang   PetscMUMPS_c(mumps);
29959566063dSJacob Faibussowitsch   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
299667877ebaSShri Abhyankar 
2997450b117fSShri Abhyankar   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2998dcd589f8SShri Abhyankar   F->ops->solve             = MatSolve_MUMPS;
299951d5961aSHong Zhang   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
3000b18964edSHong Zhang   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
3001d47f36abSHong Zhang 
3002d47f36abSHong Zhang   mumps->matstruc = SAME_NONZERO_PATTERN;
30033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3004450b117fSShri Abhyankar }
3005b24902e0SBarry Smith 
3006f0b74427SPierre Jolivet /* Note the PETSc r permutation and factor info are ignored */
3007d2a308c1SPierre Jolivet static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info)
3008d71ae5a4SJacob Faibussowitsch {
3009e69c285eSBarry Smith   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
301067877ebaSShri Abhyankar   Vec            b;
301167877ebaSShri Abhyankar   const PetscInt M = A->rmap->N;
3012397b6df1SKris Buschelman 
3013397b6df1SKris Buschelman   PetscFunctionBegin;
3014d47f36abSHong Zhang   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
3015338d3105SPierre Jolivet     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
30163ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
3017d47f36abSHong Zhang   }
3018dcd589f8SShri Abhyankar 
30199a2535b5SHong Zhang   /* Set MUMPS options from the options database */
302026cc229bSBarry Smith   PetscCall(MatSetFromOptions_MUMPS(F, A));
3021dcd589f8SShri Abhyankar 
30229566063dSJacob Faibussowitsch   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
30239566063dSJacob Faibussowitsch   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
3024dcd589f8SShri Abhyankar 
302567877ebaSShri Abhyankar   /* analysis phase */
3026a5e57a09SHong Zhang   mumps->id.job = JOB_FACTSYMBOLIC;
30276497c311SBarry Smith   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
3028a5e57a09SHong Zhang   switch (mumps->id.ICNTL(18)) {
302967877ebaSShri Abhyankar   case 0: /* centralized assembled matrix input */
3030a5e57a09SHong Zhang     if (!mumps->myid) {
3031a6053eceSJunchao Zhang       mumps->id.nnz = mumps->nnz;
3032a6053eceSJunchao Zhang       mumps->id.irn = mumps->irn;
3033a6053eceSJunchao Zhang       mumps->id.jcn = mumps->jcn;
3034cf053153SJunchao Zhang       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
303567877ebaSShri Abhyankar     }
303667877ebaSShri Abhyankar     break;
303767877ebaSShri Abhyankar   case 3: /* distributed assembled matrix input (size>1) */
3038a6053eceSJunchao Zhang     mumps->id.nnz_loc = mumps->nnz;
3039a6053eceSJunchao Zhang     mumps->id.irn_loc = mumps->irn;
3040a6053eceSJunchao Zhang     mumps->id.jcn_loc = mumps->jcn;
3041cf053153SJunchao Zhang     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
304225aac85cSJunchao Zhang     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
30439566063dSJacob Faibussowitsch       PetscCall(MatCreateVecs(A, NULL, &b));
30449566063dSJacob Faibussowitsch       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
30459566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&b));
304625aac85cSJunchao Zhang     }
304767877ebaSShri Abhyankar     break;
304867877ebaSShri Abhyankar   }
30493ab56b82SJunchao Zhang   PetscMUMPS_c(mumps);
30509566063dSJacob Faibussowitsch   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
30515cd7cf9dSHong Zhang 
30522792810eSHong Zhang   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
3053dcd589f8SShri Abhyankar   F->ops->solve                 = MatSolve_MUMPS;
305451d5961aSHong Zhang   F->ops->solvetranspose        = MatSolve_MUMPS;
30554e34a73bSHong Zhang   F->ops->matsolve              = MatMatSolve_MUMPS;
305623a5080aSHong Zhang   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
3057b18964edSHong Zhang   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
30584e34a73bSHong Zhang #if defined(PETSC_USE_COMPLEX)
30590298fd71SBarry Smith   F->ops->getinertia = NULL;
30604e34a73bSHong Zhang #else
30614e34a73bSHong Zhang   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
3062db4efbfdSBarry Smith #endif
3063d47f36abSHong Zhang 
3064d47f36abSHong Zhang   mumps->matstruc = SAME_NONZERO_PATTERN;
30653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3066b24902e0SBarry Smith }
3067b24902e0SBarry Smith 
306866976f2fSJacob Faibussowitsch static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
3069d71ae5a4SJacob Faibussowitsch {
30709f196a02SMartin Diehl   PetscBool         isascii;
307164e6c443SBarry Smith   PetscViewerFormat format;
3072e69c285eSBarry Smith   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
3073f6c57405SHong Zhang 
3074f6c57405SHong Zhang   PetscFunctionBegin;
307564e6c443SBarry Smith   /* check if matrix is mumps type */
30763ba16761SJacob Faibussowitsch   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
307764e6c443SBarry Smith 
30789f196a02SMartin Diehl   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
30799f196a02SMartin Diehl   if (isascii) {
30809566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
30811511cd71SPierre Jolivet     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
30829566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
30831511cd71SPierre Jolivet       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
30849566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
30859566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
30869566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
30879566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
30889566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
30899566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
30909566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
30919566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
30929566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
30939566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
30949566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
30959566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
3096a5e57a09SHong Zhang         if (mumps->id.ICNTL(11) > 0) {
3097cf053153SJunchao Zhang           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", (double)ID_RINFOG_GET(mumps->id, 4)));
3098cf053153SJunchao Zhang           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", (double)ID_RINFOG_GET(mumps->id, 5)));
3099cf053153SJunchao Zhang           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", (double)ID_RINFOG_GET(mumps->id, 6)));
3100cf053153SJunchao Zhang           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8)));
3101cf053153SJunchao Zhang           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", (double)ID_RINFOG_GET(mumps->id, 9)));
3102cf053153SJunchao Zhang           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11)));
3103f6c57405SHong Zhang         }
31049566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
31059566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
31069566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
310745e3843bSPierre Jolivet         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
3108f6c57405SHong Zhang         /* ICNTL(15-17) not used */
31099566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
31109566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
31119566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
31129566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
31139566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
31149566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
3115c0165424SHong Zhang 
31169566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
31179566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
31189566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
31199566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
31209566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
31219566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
312242179a6aSHong Zhang 
31239566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
31249566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
31259566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
31269566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
31279566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
312850ea2040Saszaboa         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(37) (compression of the contribution blocks):     %d\n", mumps->id.ICNTL(37)));
31299566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
3130c92b4f89SPierre Jolivet         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(48) (multithreading with tree parallelism):       %d\n", mumps->id.ICNTL(48)));
3131*7cd49bdeStom.caruso         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(49) (compact workarray at the end of factorization phase):%d\n", mumps->id.ICNTL(49)));
313291b026caSPierre Jolivet         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56)));
3133146931dbSPierre Jolivet         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));
3134f6c57405SHong Zhang 
3135cf053153SJunchao Zhang         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 1)));
3136cf053153SJunchao Zhang         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2)));
3137cf053153SJunchao Zhang         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 3)));
3138cf053153SJunchao Zhang         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", (double)ID_CNTL_GET(mumps->id, 4)));
3139cf053153SJunchao Zhang         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", (double)ID_CNTL_GET(mumps->id, 5)));
3140cf053153SJunchao Zhang         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", (double)ID_CNTL_GET(mumps->id, 7)));
3141f6c57405SHong Zhang 
3142a5b23f4aSJose E. Roman         /* information local to each processor */
31439566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
31449566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
3145cf053153SJunchao Zhang         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1)));
31469566063dSJacob Faibussowitsch         PetscCall(PetscViewerFlush(viewer));
31479566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
3148cf053153SJunchao Zhang         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2)));
31499566063dSJacob Faibussowitsch         PetscCall(PetscViewerFlush(viewer));
31509566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
3151cf053153SJunchao Zhang         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3)));
31529566063dSJacob Faibussowitsch         PetscCall(PetscViewerFlush(viewer));
3153f6c57405SHong Zhang 
31549566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
31559566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
31569566063dSJacob Faibussowitsch         PetscCall(PetscViewerFlush(viewer));
3157f6c57405SHong Zhang 
31589566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
31599566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
31609566063dSJacob Faibussowitsch         PetscCall(PetscViewerFlush(viewer));
3161f6c57405SHong Zhang 
31629566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
31639566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
31649566063dSJacob Faibussowitsch         PetscCall(PetscViewerFlush(viewer));
3165b34f08ffSHong Zhang 
3166a0e18203SThibaut Appel         if (mumps->ninfo && mumps->ninfo <= 80) {
3167b34f08ffSHong Zhang           PetscInt i;
3168b34f08ffSHong Zhang           for (i = 0; i < mumps->ninfo; i++) {
31699566063dSJacob Faibussowitsch             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
31709566063dSJacob Faibussowitsch             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
31719566063dSJacob Faibussowitsch             PetscCall(PetscViewerFlush(viewer));
3172b34f08ffSHong Zhang           }
3173b34f08ffSHong Zhang         }
31749566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
31751511cd71SPierre Jolivet       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
3176f6c57405SHong Zhang 
31771511cd71SPierre Jolivet       if (mumps->myid == 0) { /* information from the host */
3178cf053153SJunchao Zhang         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1)));
3179cf053153SJunchao Zhang         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2)));
3180cf053153SJunchao Zhang         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3)));
3181cf053153SJunchao Zhang         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34)));
3182f6c57405SHong Zhang 
31839566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
31849566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
31859566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
31869566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
31879566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
31889566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
31899566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
31909566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
31919566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
31929566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
31939566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
31949566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
31959566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
31969566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
31979566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
31989566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
31999566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
32009566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
32019566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
32029566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
32039566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
32049566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
32059566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
32069566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
32079566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
32089566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
32099566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
32109566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
32119566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
32129566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
32139566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
32149566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
32159566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
32169566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
3217f6c57405SHong Zhang       }
3218f6c57405SHong Zhang     }
3219cb828f0fSHong Zhang   }
32203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3221f6c57405SHong Zhang }
3222f6c57405SHong Zhang 
3223d2a308c1SPierre Jolivet static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info)
3224d71ae5a4SJacob Faibussowitsch {
3225e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
322635bd34faSBarry Smith 
322735bd34faSBarry Smith   PetscFunctionBegin;
322835bd34faSBarry Smith   info->block_size        = 1.0;
322964412097SPierre Jolivet   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
323064412097SPierre Jolivet   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
323135bd34faSBarry Smith   info->nz_unneeded       = 0.0;
323235bd34faSBarry Smith   info->assemblies        = 0.0;
323335bd34faSBarry Smith   info->mallocs           = 0.0;
323435bd34faSBarry Smith   info->memory            = 0.0;
323535bd34faSBarry Smith   info->fill_ratio_given  = 0;
323635bd34faSBarry Smith   info->fill_ratio_needed = 0;
323735bd34faSBarry Smith   info->factor_mallocs    = 0;
32383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
323935bd34faSBarry Smith }
324035bd34faSBarry Smith 
324166976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
3242d71ae5a4SJacob Faibussowitsch {
3243e69c285eSBarry Smith   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
3244a3d589ffSStefano Zampini   const PetscScalar *arr;
32458e7ba810SStefano Zampini   const PetscInt    *idxs;
32468e7ba810SStefano Zampini   PetscInt           size, i;
32476444a565SStefano Zampini 
32486444a565SStefano Zampini   PetscFunctionBegin;
32499566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(is, &size));
3250b3cb21ddSStefano Zampini   /* Schur complement matrix */
32519566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&F->schur));
32529566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
32539566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
3254cf053153SJunchao Zhang   // don't allocate mumps->id.schur[] now as its precision is yet to know
32556497c311SBarry Smith   PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur));
32566497c311SBarry Smith   PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld));
32579566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
325848a46eb9SPierre Jolivet   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
3259b3cb21ddSStefano Zampini 
3260b3cb21ddSStefano Zampini   /* MUMPS expects Fortran style indices */
32619566063dSJacob Faibussowitsch   PetscCall(PetscFree(mumps->id.listvar_schur));
32629566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
32639566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(is, &idxs));
3264f4f49eeaSPierre Jolivet   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i]));
32659566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(is, &idxs));
326659ac8732SStefano Zampini   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
3267cf053153SJunchao Zhang   if (mumps->id.icntl) mumps->id.ICNTL(26) = -1;
3268cf053153SJunchao Zhang   else mumps->ICNTL26 = -1;
32693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
32706444a565SStefano Zampini }
327159ac8732SStefano Zampini 
327266976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
3273d71ae5a4SJacob Faibussowitsch {
32746444a565SStefano Zampini   Mat          St;
3275e69c285eSBarry Smith   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
32766444a565SStefano Zampini   PetscScalar *array;
3277cf053153SJunchao Zhang   PetscInt     i, j, N = mumps->id.size_schur;
32786444a565SStefano Zampini 
32796444a565SStefano Zampini   PetscFunctionBegin;
32809261f6e4SBarry Smith   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
32819566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
32829566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
32839566063dSJacob Faibussowitsch   PetscCall(MatSetType(St, MATDENSE));
32849566063dSJacob Faibussowitsch   PetscCall(MatSetUp(St));
32859566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(St, &array));
328659ac8732SStefano Zampini   if (!mumps->sym) {                /* MUMPS always return a full matrix */
32876444a565SStefano Zampini     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
32886444a565SStefano Zampini       for (i = 0; i < N; i++) {
3289cf053153SJunchao Zhang         for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
32906444a565SStefano Zampini       }
32916444a565SStefano Zampini     } else { /* stored by columns */
3292cf053153SJunchao Zhang       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
32936444a565SStefano Zampini     }
32946444a565SStefano Zampini   } else {                          /* either full or lower-triangular (not packed) */
32956444a565SStefano Zampini     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
32966444a565SStefano Zampini       for (i = 0; i < N; i++) {
3297cf053153SJunchao Zhang         for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
32986444a565SStefano Zampini       }
32996444a565SStefano Zampini     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
3300cf053153SJunchao Zhang       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
33016444a565SStefano Zampini     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
33026444a565SStefano Zampini       for (i = 0; i < N; i++) {
3303cf053153SJunchao Zhang         for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
33046444a565SStefano Zampini       }
33056444a565SStefano Zampini     }
33066444a565SStefano Zampini   }
33079566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(St, &array));
33086444a565SStefano Zampini   *S = St;
33093ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
33106444a565SStefano Zampini }
33116444a565SStefano Zampini 
331266976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
3313d71ae5a4SJacob Faibussowitsch {
3314e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
33155ccb76cbSHong Zhang 
33165ccb76cbSHong Zhang   PetscFunctionBegin;
3317413bcc21SPierre Jolivet   if (mumps->id.job == JOB_NULL) {                                            /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
33186497c311SBarry Smith     PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
33199371c9d4SSatish Balay     for (i = 0; i < nICNTL_pre; ++i)
33209371c9d4SSatish Balay       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
3321413bcc21SPierre Jolivet     if (i == nICNTL_pre) {                             /* not already cached */
3322413bcc21SPierre Jolivet       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
3323413bcc21SPierre Jolivet       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
3324413bcc21SPierre Jolivet       mumps->ICNTL_pre[0]++;
3325413bcc21SPierre Jolivet     }
33266497c311SBarry Smith     mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl;
3327413bcc21SPierre Jolivet     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
3328413bcc21SPierre Jolivet   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
33293ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
33305ccb76cbSHong Zhang }
33315ccb76cbSHong Zhang 
333266976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
3333d71ae5a4SJacob Faibussowitsch {
3334e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3335bc6112feSHong Zhang 
3336bc6112feSHong Zhang   PetscFunctionBegin;
333736df9881Sjeremy theler   if (mumps->id.job == JOB_NULL) {
333836df9881Sjeremy theler     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
333936df9881Sjeremy theler     *ival = 0;
334036df9881Sjeremy theler     for (i = 0; i < nICNTL_pre; ++i) {
334136df9881Sjeremy theler       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
334236df9881Sjeremy theler     }
334336df9881Sjeremy theler   } else *ival = mumps->id.ICNTL(icntl);
33443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3345bc6112feSHong Zhang }
3346bc6112feSHong Zhang 
33475ccb76cbSHong Zhang /*@
33481d27aa22SBarry Smith   MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
33495ccb76cbSHong Zhang 
3350c3339decSBarry Smith   Logically Collective
33515ccb76cbSHong Zhang 
33525ccb76cbSHong Zhang   Input Parameters:
33530b4b7b1cSBarry Smith + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
335479578405SBarry Smith . icntl - index of MUMPS parameter array `ICNTL()`
335579578405SBarry Smith - ival  - value of MUMPS `ICNTL(icntl)`
33565ccb76cbSHong Zhang 
33573c7db156SBarry Smith   Options Database Key:
335879578405SBarry Smith . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival`
33595ccb76cbSHong Zhang 
33605ccb76cbSHong Zhang   Level: beginner
33615ccb76cbSHong Zhang 
336279578405SBarry Smith   Note:
336379578405SBarry Smith   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
336479578405SBarry Smith 
33651cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
33665ccb76cbSHong Zhang @*/
3367d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
3368d71ae5a4SJacob Faibussowitsch {
33695ccb76cbSHong Zhang   PetscFunctionBegin;
33702989dfd4SHong Zhang   PetscValidType(F, 1);
337128b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
33725ccb76cbSHong Zhang   PetscValidLogicalCollectiveInt(F, icntl, 2);
33735ccb76cbSHong Zhang   PetscValidLogicalCollectiveInt(F, ival, 3);
3374*7cd49bdeStom.caruso   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3375cac4c232SBarry Smith   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
33763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
33775ccb76cbSHong Zhang }
33785ccb76cbSHong Zhang 
3379a21f80fcSHong Zhang /*@
33801d27aa22SBarry Smith   MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
3381a21f80fcSHong Zhang 
3382c3339decSBarry Smith   Logically Collective
3383a21f80fcSHong Zhang 
3384a21f80fcSHong Zhang   Input Parameters:
33850b4b7b1cSBarry Smith + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3386a21f80fcSHong Zhang - icntl - index of MUMPS parameter array ICNTL()
3387a21f80fcSHong Zhang 
3388a21f80fcSHong Zhang   Output Parameter:
3389a21f80fcSHong Zhang . ival - value of MUMPS ICNTL(icntl)
3390a21f80fcSHong Zhang 
3391a21f80fcSHong Zhang   Level: beginner
3392a21f80fcSHong Zhang 
33931cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3394a21f80fcSHong Zhang @*/
3395d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
3396d71ae5a4SJacob Faibussowitsch {
3397bc6112feSHong Zhang   PetscFunctionBegin;
33982989dfd4SHong Zhang   PetscValidType(F, 1);
339928b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3400bc6112feSHong Zhang   PetscValidLogicalCollectiveInt(F, icntl, 2);
34014f572ea9SToby Isaac   PetscAssertPointer(ival, 3);
3402*7cd49bdeStom.caruso   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3403cac4c232SBarry Smith   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
34043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3405bc6112feSHong Zhang }
3406bc6112feSHong Zhang 
340766976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
3408d71ae5a4SJacob Faibussowitsch {
3409e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
34108928b65cSHong Zhang 
34118928b65cSHong Zhang   PetscFunctionBegin;
3412413bcc21SPierre Jolivet   if (mumps->id.job == JOB_NULL) {
3413413bcc21SPierre Jolivet     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
34149371c9d4SSatish Balay     for (i = 0; i < nCNTL_pre; ++i)
34159371c9d4SSatish Balay       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
3416413bcc21SPierre Jolivet     if (i == nCNTL_pre) {
3417413bcc21SPierre Jolivet       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
3418413bcc21SPierre Jolivet       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
3419413bcc21SPierre Jolivet       mumps->CNTL_pre[0]++;
3420413bcc21SPierre Jolivet     }
3421413bcc21SPierre Jolivet     mumps->CNTL_pre[1 + 2 * i] = icntl;
3422413bcc21SPierre Jolivet     mumps->CNTL_pre[2 + 2 * i] = val;
3423cf053153SJunchao Zhang   } else ID_CNTL_SET(mumps->id, icntl, val);
34243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
34258928b65cSHong Zhang }
34268928b65cSHong Zhang 
342766976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
3428d71ae5a4SJacob Faibussowitsch {
3429e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3430bc6112feSHong Zhang 
3431bc6112feSHong Zhang   PetscFunctionBegin;
343236df9881Sjeremy theler   if (mumps->id.job == JOB_NULL) {
343336df9881Sjeremy theler     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
343436df9881Sjeremy theler     *val = 0.0;
343536df9881Sjeremy theler     for (i = 0; i < nCNTL_pre; ++i) {
343636df9881Sjeremy theler       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
343736df9881Sjeremy theler     }
3438cf053153SJunchao Zhang   } else *val = ID_CNTL_GET(mumps->id, icntl);
34393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3440bc6112feSHong Zhang }
3441bc6112feSHong Zhang 
34428928b65cSHong Zhang /*@
34431d27aa22SBarry Smith   MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
34448928b65cSHong Zhang 
3445c3339decSBarry Smith   Logically Collective
34468928b65cSHong Zhang 
34478928b65cSHong Zhang   Input Parameters:
34480b4b7b1cSBarry Smith + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
344979578405SBarry Smith . icntl - index of MUMPS parameter array `CNTL()`
345079578405SBarry Smith - val   - value of MUMPS `CNTL(icntl)`
34518928b65cSHong Zhang 
34523c7db156SBarry Smith   Options Database Key:
3453147403d9SBarry Smith . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
34548928b65cSHong Zhang 
34558928b65cSHong Zhang   Level: beginner
34568928b65cSHong Zhang 
345779578405SBarry Smith   Note:
345879578405SBarry Smith   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
345979578405SBarry Smith 
34601cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
34618928b65cSHong Zhang @*/
3462d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
3463d71ae5a4SJacob Faibussowitsch {
34648928b65cSHong Zhang   PetscFunctionBegin;
34652989dfd4SHong Zhang   PetscValidType(F, 1);
346628b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
34678928b65cSHong Zhang   PetscValidLogicalCollectiveInt(F, icntl, 2);
3468bc6112feSHong Zhang   PetscValidLogicalCollectiveReal(F, val, 3);
3469413bcc21SPierre Jolivet   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3470cac4c232SBarry Smith   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
34713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
34728928b65cSHong Zhang }
34738928b65cSHong Zhang 
3474a21f80fcSHong Zhang /*@
34751d27aa22SBarry Smith   MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
3476a21f80fcSHong Zhang 
3477c3339decSBarry Smith   Logically Collective
3478a21f80fcSHong Zhang 
3479a21f80fcSHong Zhang   Input Parameters:
34800b4b7b1cSBarry Smith + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3481a21f80fcSHong Zhang - icntl - index of MUMPS parameter array CNTL()
3482a21f80fcSHong Zhang 
3483a21f80fcSHong Zhang   Output Parameter:
3484a21f80fcSHong Zhang . val - value of MUMPS CNTL(icntl)
3485a21f80fcSHong Zhang 
3486a21f80fcSHong Zhang   Level: beginner
3487a21f80fcSHong Zhang 
34881cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3489a21f80fcSHong Zhang @*/
3490d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
3491d71ae5a4SJacob Faibussowitsch {
3492bc6112feSHong Zhang   PetscFunctionBegin;
34932989dfd4SHong Zhang   PetscValidType(F, 1);
349428b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3495bc6112feSHong Zhang   PetscValidLogicalCollectiveInt(F, icntl, 2);
34964f572ea9SToby Isaac   PetscAssertPointer(val, 3);
3497413bcc21SPierre Jolivet   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3498cac4c232SBarry Smith   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
34993ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3500bc6112feSHong Zhang }
3501bc6112feSHong Zhang 
350266976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
3503d71ae5a4SJacob Faibussowitsch {
3504e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3505bc6112feSHong Zhang 
3506bc6112feSHong Zhang   PetscFunctionBegin;
3507bc6112feSHong Zhang   *info = mumps->id.INFO(icntl);
35083ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3509bc6112feSHong Zhang }
3510bc6112feSHong Zhang 
351166976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
3512d71ae5a4SJacob Faibussowitsch {
3513e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3514bc6112feSHong Zhang 
3515bc6112feSHong Zhang   PetscFunctionBegin;
3516bc6112feSHong Zhang   *infog = mumps->id.INFOG(icntl);
35173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3518bc6112feSHong Zhang }
3519bc6112feSHong Zhang 
352066976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
3521d71ae5a4SJacob Faibussowitsch {
3522e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3523bc6112feSHong Zhang 
3524bc6112feSHong Zhang   PetscFunctionBegin;
3525cf053153SJunchao Zhang   *rinfo = ID_RINFO_GET(mumps->id, icntl);
35263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3527bc6112feSHong Zhang }
3528bc6112feSHong Zhang 
352966976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
3530d71ae5a4SJacob Faibussowitsch {
3531e69c285eSBarry Smith   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3532bc6112feSHong Zhang 
3533bc6112feSHong Zhang   PetscFunctionBegin;
3534cf053153SJunchao Zhang   *rinfog = ID_RINFOG_GET(mumps->id, icntl);
35353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3536bc6112feSHong Zhang }
3537bc6112feSHong Zhang 
353866976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
35395c0bae8cSAshish Patel {
35405c0bae8cSAshish Patel   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
35415c0bae8cSAshish Patel 
35425c0bae8cSAshish Patel   PetscFunctionBegin;
35435c0bae8cSAshish Patel   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
35445c0bae8cSAshish Patel   *size  = 0;
35455c0bae8cSAshish Patel   *array = NULL;
35465c0bae8cSAshish Patel   if (!mumps->myid) {
35475c0bae8cSAshish Patel     *size = mumps->id.INFOG(28);
35485c0bae8cSAshish Patel     PetscCall(PetscMalloc1(*size, array));
35495c0bae8cSAshish Patel     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
35505c0bae8cSAshish Patel   }
35515c0bae8cSAshish Patel   PetscFunctionReturn(PETSC_SUCCESS);
35525c0bae8cSAshish Patel }
35535c0bae8cSAshish Patel 
355466976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
3555d71ae5a4SJacob Faibussowitsch {
35560e6b8875SHong Zhang   Mat          Bt = NULL, Btseq = NULL;
35570e6b8875SHong Zhang   PetscBool    flg;
3558bb599dfdSHong Zhang   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3559bb599dfdSHong Zhang   PetscScalar *aa;
3560f410b75aSHong Zhang   PetscInt     spnr, *ia, *ja, M, nrhs;
3561bb599dfdSHong Zhang 
3562bb599dfdSHong Zhang   PetscFunctionBegin;
35634f572ea9SToby Isaac   PetscAssertPointer(spRHS, 2);
3564013e2dc7SBarry Smith   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
356553587d93SPierre Jolivet   PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
356653587d93SPierre Jolivet   PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
35679566063dSJacob Faibussowitsch   PetscCall(MatTransposeGetMat(spRHS, &Bt));
3568bb599dfdSHong Zhang 
35699566063dSJacob Faibussowitsch   PetscCall(MatMumpsSetIcntl(F, 30, 1));
3570bb599dfdSHong Zhang 
35712d4298aeSJunchao Zhang   if (mumps->petsc_size > 1) {
35720e6b8875SHong Zhang     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
35730e6b8875SHong Zhang     Btseq         = b->A;
35740e6b8875SHong Zhang   } else {
35750e6b8875SHong Zhang     Btseq = Bt;
35760e6b8875SHong Zhang   }
35770e6b8875SHong Zhang 
35789566063dSJacob Faibussowitsch   PetscCall(MatGetSize(spRHS, &M, &nrhs));
35796497c311SBarry Smith   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
35806497c311SBarry Smith   PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs));
3581f410b75aSHong Zhang   mumps->id.rhs = NULL;
3582f410b75aSHong Zhang 
3583e3f2db6aSHong Zhang   if (!mumps->myid) {
35849566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
35859566063dSJacob Faibussowitsch     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
358628b400f6SJacob Faibussowitsch     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
35879566063dSJacob Faibussowitsch     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3588cf053153SJunchao Zhang     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
3589e3f2db6aSHong Zhang   } else {
3590e3f2db6aSHong Zhang     mumps->id.irhs_ptr    = NULL;
3591e3f2db6aSHong Zhang     mumps->id.irhs_sparse = NULL;
3592e3f2db6aSHong Zhang     mumps->id.nz_rhs      = 0;
3593cf053153SJunchao Zhang     if (mumps->id.rhs_sparse_len) {
3594cf053153SJunchao Zhang       PetscCall(PetscFree(mumps->id.rhs_sparse));
3595cf053153SJunchao Zhang       mumps->id.rhs_sparse_len = 0;
3596cf053153SJunchao Zhang     }
3597e3f2db6aSHong Zhang   }
3598bb599dfdSHong Zhang   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3599e3f2db6aSHong Zhang   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
3600bb599dfdSHong Zhang 
3601bb599dfdSHong Zhang   /* solve phase */
3602bb599dfdSHong Zhang   mumps->id.job = JOB_SOLVE;
36033ab56b82SJunchao Zhang   PetscMUMPS_c(mumps);
36049261f6e4SBarry Smith   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
360514267174SHong Zhang 
3606e3f2db6aSHong Zhang   if (!mumps->myid) {
36079566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
36089566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
360928b400f6SJacob Faibussowitsch     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3610e3f2db6aSHong Zhang   }
36113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3612bb599dfdSHong Zhang }
3613bb599dfdSHong Zhang 
3614bb599dfdSHong Zhang /*@
36151d27aa22SBarry Smith   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc>
3616bb599dfdSHong Zhang 
3617c3339decSBarry Smith   Logically Collective
3618bb599dfdSHong Zhang 
361920f4b53cSBarry Smith   Input Parameter:
36200b4b7b1cSBarry Smith . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3621bb599dfdSHong Zhang 
3622bb599dfdSHong Zhang   Output Parameter:
362320f4b53cSBarry Smith . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
3624bb599dfdSHong Zhang 
3625bb599dfdSHong Zhang   Level: beginner
3626bb599dfdSHong Zhang 
36271cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
3628bb599dfdSHong Zhang @*/
3629d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
3630d71ae5a4SJacob Faibussowitsch {
3631bb599dfdSHong Zhang   PetscFunctionBegin;
3632bb599dfdSHong Zhang   PetscValidType(F, 1);
363328b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3634cac4c232SBarry Smith   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
36353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3636bb599dfdSHong Zhang }
3637bb599dfdSHong Zhang 
363866976f2fSJacob Faibussowitsch static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3639d71ae5a4SJacob Faibussowitsch {
36400e6b8875SHong Zhang   Mat spRHS;
36410e6b8875SHong Zhang 
36420e6b8875SHong Zhang   PetscFunctionBegin;
36439566063dSJacob Faibussowitsch   PetscCall(MatCreateTranspose(spRHST, &spRHS));
36449566063dSJacob Faibussowitsch   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
36459566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&spRHS));
36463ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
36470e6b8875SHong Zhang }
36480e6b8875SHong Zhang 
36490e6b8875SHong Zhang /*@
36501d27aa22SBarry Smith   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc>
36510e6b8875SHong Zhang 
3652c3339decSBarry Smith   Logically Collective
36530e6b8875SHong Zhang 
365420f4b53cSBarry Smith   Input Parameter:
36550b4b7b1cSBarry Smith . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
36560e6b8875SHong Zhang 
36570e6b8875SHong Zhang   Output Parameter:
365820f4b53cSBarry Smith . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
36590e6b8875SHong Zhang 
36600e6b8875SHong Zhang   Level: beginner
36610e6b8875SHong Zhang 
36621cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
36630e6b8875SHong Zhang @*/
3664d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3665d71ae5a4SJacob Faibussowitsch {
36660e6b8875SHong Zhang   PetscBool flg;
36670e6b8875SHong Zhang 
36680e6b8875SHong Zhang   PetscFunctionBegin;
36690e6b8875SHong Zhang   PetscValidType(F, 1);
367028b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
36719566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
367228b400f6SJacob Faibussowitsch   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3673cac4c232SBarry Smith   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
36743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
36750e6b8875SHong Zhang }
36760e6b8875SHong Zhang 
367793d70b8aSPierre Jolivet static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
367893d70b8aSPierre Jolivet {
367993d70b8aSPierre Jolivet   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
368093d70b8aSPierre Jolivet 
368193d70b8aSPierre Jolivet   PetscFunctionBegin;
368293d70b8aSPierre Jolivet   if (nblk) {
368393d70b8aSPierre Jolivet     PetscAssertPointer(blkptr, 4);
368493d70b8aSPierre Jolivet     PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk));
368593d70b8aSPierre Jolivet     PetscCall(PetscFree(mumps->id.blkptr));
368693d70b8aSPierre Jolivet     PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr));
368793d70b8aSPierre Jolivet     for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i));
3688cf053153SJunchao Zhang     // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15).
3689cf053153SJunchao Zhang     // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS().
3690cf053153SJunchao Zhang     // mumps->id.ICNTL(15) = 1;
369193d70b8aSPierre Jolivet     if (blkvar) {
369293d70b8aSPierre Jolivet       PetscCall(PetscFree(mumps->id.blkvar));
369393d70b8aSPierre Jolivet       PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar));
369493d70b8aSPierre Jolivet       for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i));
369593d70b8aSPierre Jolivet     }
369693d70b8aSPierre Jolivet   } else {
369793d70b8aSPierre Jolivet     PetscCall(PetscFree(mumps->id.blkptr));
369893d70b8aSPierre Jolivet     PetscCall(PetscFree(mumps->id.blkvar));
3699cf053153SJunchao Zhang     // mumps->id.ICNTL(15) = 0;
3700cf053153SJunchao Zhang     mumps->id.nblk = 0;
370193d70b8aSPierre Jolivet   }
370293d70b8aSPierre Jolivet   PetscFunctionReturn(PETSC_SUCCESS);
370393d70b8aSPierre Jolivet }
370493d70b8aSPierre Jolivet 
370593d70b8aSPierre Jolivet /*@
370693d70b8aSPierre Jolivet   MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1`
370793d70b8aSPierre Jolivet 
370893d70b8aSPierre Jolivet   Not collective, only relevant on the first process of the MPI communicator
370993d70b8aSPierre Jolivet 
371093d70b8aSPierre Jolivet   Input Parameters:
371193d70b8aSPierre Jolivet + F      - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
371293d70b8aSPierre Jolivet . nblk   - the number of blocks
371393d70b8aSPierre Jolivet . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk`
371493d70b8aSPierre Jolivet - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks
371593d70b8aSPierre Jolivet 
371693d70b8aSPierre Jolivet   Level: advanced
371793d70b8aSPierre Jolivet 
371893d70b8aSPierre Jolivet .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()`
371993d70b8aSPierre Jolivet @*/
372093d70b8aSPierre Jolivet PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
372193d70b8aSPierre Jolivet {
372293d70b8aSPierre Jolivet   PetscFunctionBegin;
372393d70b8aSPierre Jolivet   PetscValidType(F, 1);
372493d70b8aSPierre Jolivet   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
372593d70b8aSPierre Jolivet   PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr));
372693d70b8aSPierre Jolivet   PetscFunctionReturn(PETSC_SUCCESS);
372793d70b8aSPierre Jolivet }
372893d70b8aSPierre Jolivet 
3729a21f80fcSHong Zhang /*@
37301d27aa22SBarry Smith   MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc>
3731a21f80fcSHong Zhang 
3732c3339decSBarry Smith   Logically Collective
3733a21f80fcSHong Zhang 
3734a21f80fcSHong Zhang   Input Parameters:
37350b4b7b1cSBarry Smith + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3736a21f80fcSHong Zhang - icntl - index of MUMPS parameter array INFO()
3737a21f80fcSHong Zhang 
3738a21f80fcSHong Zhang   Output Parameter:
3739a21f80fcSHong Zhang . ival - value of MUMPS INFO(icntl)
3740a21f80fcSHong Zhang 
3741a21f80fcSHong Zhang   Level: beginner
3742a21f80fcSHong Zhang 
37431cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3744a21f80fcSHong Zhang @*/
3745d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3746d71ae5a4SJacob Faibussowitsch {
3747bc6112feSHong Zhang   PetscFunctionBegin;
37482989dfd4SHong Zhang   PetscValidType(F, 1);
374928b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
37504f572ea9SToby Isaac   PetscAssertPointer(ival, 3);
3751cac4c232SBarry Smith   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
37523ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3753bc6112feSHong Zhang }
3754bc6112feSHong Zhang 
3755a21f80fcSHong Zhang /*@
37561d27aa22SBarry Smith   MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc>
3757a21f80fcSHong Zhang 
3758c3339decSBarry Smith   Logically Collective
3759a21f80fcSHong Zhang 
3760a21f80fcSHong Zhang   Input Parameters:
37610b4b7b1cSBarry Smith + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3762a21f80fcSHong Zhang - icntl - index of MUMPS parameter array INFOG()
3763a21f80fcSHong Zhang 
3764a21f80fcSHong Zhang   Output Parameter:
3765a21f80fcSHong Zhang . ival - value of MUMPS INFOG(icntl)
3766a21f80fcSHong Zhang 
3767a21f80fcSHong Zhang   Level: beginner
3768a21f80fcSHong Zhang 
37691cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3770a21f80fcSHong Zhang @*/
3771d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3772d71ae5a4SJacob Faibussowitsch {
3773bc6112feSHong Zhang   PetscFunctionBegin;
37742989dfd4SHong Zhang   PetscValidType(F, 1);
377528b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
37764f572ea9SToby Isaac   PetscAssertPointer(ival, 3);
3777cac4c232SBarry Smith   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
37783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3779bc6112feSHong Zhang }
3780bc6112feSHong Zhang 
3781a21f80fcSHong Zhang /*@
37821d27aa22SBarry Smith   MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc>
3783a21f80fcSHong Zhang 
3784c3339decSBarry Smith   Logically Collective
3785a21f80fcSHong Zhang 
3786a21f80fcSHong Zhang   Input Parameters:
37870b4b7b1cSBarry Smith + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3788a21f80fcSHong Zhang - icntl - index of MUMPS parameter array RINFO()
3789a21f80fcSHong Zhang 
3790a21f80fcSHong Zhang   Output Parameter:
3791a21f80fcSHong Zhang . val - value of MUMPS RINFO(icntl)
3792a21f80fcSHong Zhang 
3793a21f80fcSHong Zhang   Level: beginner
3794a21f80fcSHong Zhang 
37951cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3796a21f80fcSHong Zhang @*/
3797d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3798d71ae5a4SJacob Faibussowitsch {
3799bc6112feSHong Zhang   PetscFunctionBegin;
38002989dfd4SHong Zhang   PetscValidType(F, 1);
380128b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
38024f572ea9SToby Isaac   PetscAssertPointer(val, 3);
3803cac4c232SBarry Smith   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
38043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3805bc6112feSHong Zhang }
3806bc6112feSHong Zhang 
3807a21f80fcSHong Zhang /*@
38081d27aa22SBarry Smith   MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc>
3809a21f80fcSHong Zhang 
3810c3339decSBarry Smith   Logically Collective
3811a21f80fcSHong Zhang 
3812a21f80fcSHong Zhang   Input Parameters:
38130b4b7b1cSBarry Smith + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3814a21f80fcSHong Zhang - icntl - index of MUMPS parameter array RINFOG()
3815a21f80fcSHong Zhang 
3816a21f80fcSHong Zhang   Output Parameter:
3817a21f80fcSHong Zhang . val - value of MUMPS RINFOG(icntl)
3818a21f80fcSHong Zhang 
3819a21f80fcSHong Zhang   Level: beginner
3820a21f80fcSHong Zhang 
38211cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3822a21f80fcSHong Zhang @*/
3823d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3824d71ae5a4SJacob Faibussowitsch {
3825bc6112feSHong Zhang   PetscFunctionBegin;
38262989dfd4SHong Zhang   PetscValidType(F, 1);
382728b400f6SJacob Faibussowitsch   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
38284f572ea9SToby Isaac   PetscAssertPointer(val, 3);
3829cac4c232SBarry Smith   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
38303ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3831bc6112feSHong Zhang }
3832bc6112feSHong Zhang 
38335c0bae8cSAshish Patel /*@
38341d27aa22SBarry Smith   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc>
38355c0bae8cSAshish Patel 
38365c0bae8cSAshish Patel   Logically Collective
38375c0bae8cSAshish Patel 
38385c0bae8cSAshish Patel   Input Parameter:
38390b4b7b1cSBarry Smith . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
38405c0bae8cSAshish Patel 
38415c0bae8cSAshish Patel   Output Parameters:
38420b4b7b1cSBarry Smith + size  - local size of the array. The size of the array is non-zero only on MPI rank 0
38435c0bae8cSAshish Patel - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
38445c0bae8cSAshish Patel           for freeing this array.
38455c0bae8cSAshish Patel 
38465c0bae8cSAshish Patel   Level: beginner
38475c0bae8cSAshish Patel 
38481cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
38495c0bae8cSAshish Patel @*/
38505c0bae8cSAshish Patel PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
38515c0bae8cSAshish Patel {
38525c0bae8cSAshish Patel   PetscFunctionBegin;
38535c0bae8cSAshish Patel   PetscValidType(F, 1);
38545c0bae8cSAshish Patel   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
38554f572ea9SToby Isaac   PetscAssertPointer(size, 2);
38564f572ea9SToby Isaac   PetscAssertPointer(array, 3);
38575c0bae8cSAshish Patel   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
38585c0bae8cSAshish Patel   PetscFunctionReturn(PETSC_SUCCESS);
38595c0bae8cSAshish Patel }
38605c0bae8cSAshish Patel 
386124b6179bSKris Buschelman /*MC
38622692d6eeSBarry Smith   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
38630b4b7b1cSBarry Smith   MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc>
386424b6179bSKris Buschelman 
386511a5261eSBarry Smith   Works with `MATAIJ` and `MATSBAIJ` matrices
386624b6179bSKris Buschelman 
3867c2b89b5dSBarry Smith   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3868c2b89b5dSBarry Smith 
38692ef1f0ffSBarry Smith   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
38702ef1f0ffSBarry Smith   See details below.
3871217d3b1eSJunchao Zhang 
38722ef1f0ffSBarry Smith   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3873c2b89b5dSBarry Smith 
387424b6179bSKris Buschelman   Options Database Keys:
38754422a9fcSPatrick Sanan +  -mat_mumps_icntl_1  - ICNTL(1): output stream for error messages
38764422a9fcSPatrick Sanan .  -mat_mumps_icntl_2  - ICNTL(2): output stream for diagnostic printing, statistics, and warning
38774422a9fcSPatrick Sanan .  -mat_mumps_icntl_3  - ICNTL(3): output stream for global information, collected on the host
38784422a9fcSPatrick Sanan .  -mat_mumps_icntl_4  - ICNTL(4): level of printing (0 to 4)
38794422a9fcSPatrick Sanan .  -mat_mumps_icntl_6  - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3880b53c1a7fSBarry Smith .  -mat_mumps_icntl_7  - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3881b53c1a7fSBarry Smith                           Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
38824422a9fcSPatrick Sanan .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
38834422a9fcSPatrick Sanan .  -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
38844422a9fcSPatrick Sanan .  -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
38854422a9fcSPatrick Sanan .  -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
38864422a9fcSPatrick Sanan .  -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
38874422a9fcSPatrick Sanan .  -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
388845e3843bSPierre Jolivet .  -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format
38894422a9fcSPatrick Sanan .  -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
389025aac85cSJunchao Zhang .  -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
38914422a9fcSPatrick Sanan .  -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
38924422a9fcSPatrick Sanan .  -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
38934422a9fcSPatrick Sanan .  -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
38944422a9fcSPatrick Sanan .  -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
38954422a9fcSPatrick Sanan .  -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
3896fa6fd9d0SPierre Jolivet .  -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering
38974422a9fcSPatrick Sanan .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
38984422a9fcSPatrick Sanan .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
38994422a9fcSPatrick Sanan .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
39004422a9fcSPatrick Sanan .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3901a0e18203SThibaut Appel .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3902a0e18203SThibaut Appel .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
390350ea2040Saszaboa .  -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB)
3904a0e18203SThibaut Appel .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3905c92b4f89SPierre Jolivet .  -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism
3906*7cd49bdeStom.caruso .  -mat_mumps_icntl_49 - ICNTL(49): compact workarray at the end of factorization phase
3907146931dbSPierre Jolivet .  -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
39084422a9fcSPatrick Sanan .  -mat_mumps_cntl_1   - CNTL(1): relative pivoting threshold
39094422a9fcSPatrick Sanan .  -mat_mumps_cntl_2   - CNTL(2): stopping criterion of refinement
39104422a9fcSPatrick Sanan .  -mat_mumps_cntl_3   - CNTL(3): absolute pivoting threshold
39114422a9fcSPatrick Sanan .  -mat_mumps_cntl_4   - CNTL(4): value for static pivoting
3912217d3b1eSJunchao Zhang .  -mat_mumps_cntl_5   - CNTL(5): fixation for null pivots
3913a0e18203SThibaut Appel .  -mat_mumps_cntl_7   - CNTL(7): precision of the dropping parameter used during BLR factorization
3914217d3b1eSJunchao Zhang -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3915217d3b1eSJunchao Zhang                                     Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
391624b6179bSKris Buschelman 
391724b6179bSKris Buschelman   Level: beginner
391824b6179bSKris Buschelman 
391995452b02SPatrick Sanan   Notes:
39201d27aa22SBarry Smith   MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will
39212ef1f0ffSBarry Smith   error if the matrix is Hermitian.
392238548759SBarry Smith 
392326cc229bSBarry Smith   When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
392426cc229bSBarry Smith   `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
392526cc229bSBarry Smith 
39262ef1f0ffSBarry Smith   When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
39272ef1f0ffSBarry Smith   the failure with
39282ef1f0ffSBarry Smith .vb
39292ef1f0ffSBarry Smith           KSPGetPC(ksp,&pc);
39302ef1f0ffSBarry Smith           PCFactorGetMatrix(pc,&mat);
39312ef1f0ffSBarry Smith           MatMumpsGetInfo(mat,....);
39322ef1f0ffSBarry Smith           MatMumpsGetInfog(mat,....); etc.
39332ef1f0ffSBarry Smith .ve
39342ef1f0ffSBarry Smith   Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
39359fc87aa7SBarry Smith 
3936a5399872SJunchao Zhang   MUMPS provides 64-bit integer support in two build modes:
3937a5399872SJunchao Zhang   full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3938a5399872SJunchao Zhang   requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
39398fcaa860SBarry Smith 
3940a5399872SJunchao Zhang   selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3941a5399872SJunchao Zhang   MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3942a5399872SJunchao Zhang   columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3943a5399872SJunchao Zhang   integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3944a5399872SJunchao Zhang 
3945a5399872SJunchao Zhang   With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3946a5399872SJunchao Zhang 
3947a5399872SJunchao Zhang   Two modes to run MUMPS/PETSc with OpenMP
39482ef1f0ffSBarry Smith .vb
39490b4b7b1cSBarry Smith    Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
39500b4b7b1cSBarry Smith    threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test".
39512ef1f0ffSBarry Smith .ve
39528fcaa860SBarry Smith 
39532ef1f0ffSBarry Smith .vb
39540b4b7b1cSBarry Smith    `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example,
39552ef1f0ffSBarry Smith    if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
39562ef1f0ffSBarry Smith .ve
39578fcaa860SBarry Smith 
39588fcaa860SBarry Smith    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
39592ef1f0ffSBarry Smith    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
39602ef1f0ffSBarry Smith    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
39618fcaa860SBarry Smith    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
39620b4b7b1cSBarry Smith    (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided).
3963217d3b1eSJunchao Zhang 
39648fcaa860SBarry Smith    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3965217d3b1eSJunchao Zhang    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3966217d3b1eSJunchao Zhang    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3967217d3b1eSJunchao Zhang    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3968217d3b1eSJunchao Zhang    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3969217d3b1eSJunchao Zhang    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3970217d3b1eSJunchao Zhang    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
39710b4b7b1cSBarry Smith    MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3972217d3b1eSJunchao Zhang    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
39730b4b7b1cSBarry Smith    problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding.
39740b4b7b1cSBarry Smith    For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and
3975217d3b1eSJunchao Zhang    examine the mapping result.
3976217d3b1eSJunchao Zhang 
397711a5261eSBarry Smith    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
397811a5261eSBarry Smith    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
397911a5261eSBarry Smith    calls `omp_set_num_threads`(m) internally before calling MUMPS.
3980217d3b1eSJunchao Zhang 
39811d27aa22SBarry Smith    See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating`
3982217d3b1eSJunchao Zhang 
398393d70b8aSPierre Jolivet .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()`
398424b6179bSKris Buschelman M*/
398524b6179bSKris Buschelman 
3986d2a308c1SPierre Jolivet static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type)
3987d71ae5a4SJacob Faibussowitsch {
398835bd34faSBarry Smith   PetscFunctionBegin;
39892692d6eeSBarry Smith   *type = MATSOLVERMUMPS;
39903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
399135bd34faSBarry Smith }
399235bd34faSBarry Smith 
3993bccb9932SShri Abhyankar /* MatGetFactor for Seq and MPI AIJ matrices */
3994d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3995d71ae5a4SJacob Faibussowitsch {
39962877fffaSHong Zhang   Mat         B;
39972877fffaSHong Zhang   Mat_MUMPS  *mumps;
39984b9405b2SPierre Jolivet   PetscBool   isSeqAIJ, isDiag, isDense;
39992c7c0729SBarry Smith   PetscMPIInt size;
40002877fffaSHong Zhang 
40012877fffaSHong Zhang   PetscFunctionBegin;
4002eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
400303e5aca4SStefano Zampini   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
400403e5aca4SStefano Zampini     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
400503e5aca4SStefano Zampini     *F = NULL;
400603e5aca4SStefano Zampini     PetscFunctionReturn(PETSC_SUCCESS);
400703e5aca4SStefano Zampini   }
4008eb1ec7c1SStefano Zampini #endif
40092877fffaSHong Zhang   /* Create the factorization matrix */
40109566063dSJacob Faibussowitsch   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
4011c3e1b152SPierre Jolivet   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
40124b9405b2SPierre Jolivet   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
40139566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
40149566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4015d2a308c1SPierre Jolivet   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
40169566063dSJacob Faibussowitsch   PetscCall(MatSetUp(B));
40172877fffaSHong Zhang 
40184dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&mumps));
40192205254eSKarl Rupp 
40202877fffaSHong Zhang   B->ops->view    = MatView_MUMPS;
402135bd34faSBarry Smith   B->ops->getinfo = MatGetInfo_MUMPS;
40222205254eSKarl Rupp 
40239566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
40249566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
40259566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
40269566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
40279566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
40289566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
40299566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
40309566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
40319566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
40329566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
40339566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
40345c0bae8cSAshish Patel   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
40359566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
40369566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
403793d70b8aSPierre Jolivet   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
40386444a565SStefano Zampini 
4039450b117fSShri Abhyankar   if (ftype == MAT_FACTOR_LU) {
4040450b117fSShri Abhyankar     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4041d5f3da31SBarry Smith     B->factortype            = MAT_FACTOR_LU;
4042bccb9932SShri Abhyankar     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
4043c3e1b152SPierre Jolivet     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
40444b9405b2SPierre Jolivet     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4045bccb9932SShri Abhyankar     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
40469566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4047746480a1SHong Zhang     mumps->sym = 0;
4048dcd589f8SShri Abhyankar   } else {
404967877ebaSShri Abhyankar     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4050450b117fSShri Abhyankar     B->factortype                  = MAT_FACTOR_CHOLESKY;
4051bccb9932SShri Abhyankar     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
4052c3e1b152SPierre Jolivet     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
40534b9405b2SPierre Jolivet     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4054bccb9932SShri Abhyankar     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
40559566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
405659ac8732SStefano Zampini #if defined(PETSC_USE_COMPLEX)
405759ac8732SStefano Zampini     mumps->sym = 2;
405859ac8732SStefano Zampini #else
4059b94d7dedSBarry Smith     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
40606fdc2a6dSBarry Smith     else mumps->sym = 2;
406159ac8732SStefano Zampini #endif
4062450b117fSShri Abhyankar   }
40632877fffaSHong Zhang 
406400c67f3bSHong Zhang   /* set solvertype */
40659566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
40669566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
40679566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
40682c7c0729SBarry Smith   if (size == 1) {
40694ac6704cSBarry Smith     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4070f73b0415SBarry Smith     B->canuseordering = PETSC_TRUE;
40712c7c0729SBarry Smith   }
40722877fffaSHong Zhang   B->ops->destroy = MatDestroy_MUMPS;
4073e69c285eSBarry Smith   B->data         = (void *)mumps;
40742205254eSKarl Rupp 
40752877fffaSHong Zhang   *F               = B;
4076413bcc21SPierre Jolivet   mumps->id.job    = JOB_NULL;
4077413bcc21SPierre Jolivet   mumps->ICNTL_pre = NULL;
4078413bcc21SPierre Jolivet   mumps->CNTL_pre  = NULL;
4079d47f36abSHong Zhang   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
40803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
40812877fffaSHong Zhang }
40822877fffaSHong Zhang 
4083bccb9932SShri Abhyankar /* MatGetFactor for Seq and MPI SBAIJ matrices */
4084d2a308c1SPierre Jolivet static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F)
4085d71ae5a4SJacob Faibussowitsch {
40862877fffaSHong Zhang   Mat         B;
40872877fffaSHong Zhang   Mat_MUMPS  *mumps;
4088ace3abfcSBarry Smith   PetscBool   isSeqSBAIJ;
40892c7c0729SBarry Smith   PetscMPIInt size;
40902877fffaSHong Zhang 
40912877fffaSHong Zhang   PetscFunctionBegin;
4092eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
409303e5aca4SStefano Zampini   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
409403e5aca4SStefano Zampini     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
409503e5aca4SStefano Zampini     *F = NULL;
409603e5aca4SStefano Zampini     PetscFunctionReturn(PETSC_SUCCESS);
409703e5aca4SStefano Zampini   }
4098eb1ec7c1SStefano Zampini #endif
40999566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
41009566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4101d2a308c1SPierre Jolivet   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
41029566063dSJacob Faibussowitsch   PetscCall(MatSetUp(B));
4103e69c285eSBarry Smith 
41044dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&mumps));
41059566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
4106bccb9932SShri Abhyankar   if (isSeqSBAIJ) {
410716ebf90aSShri Abhyankar     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
4108dcd589f8SShri Abhyankar   } else {
4109bccb9932SShri Abhyankar     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
4110bccb9932SShri Abhyankar   }
4111bccb9932SShri Abhyankar 
411267877ebaSShri Abhyankar   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4113bccb9932SShri Abhyankar   B->ops->view                   = MatView_MUMPS;
4114722b6324SPierre Jolivet   B->ops->getinfo                = MatGetInfo_MUMPS;
41152205254eSKarl Rupp 
41169566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
41179566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
41189566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
41199566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
41209566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
41219566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
41229566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
41239566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
41249566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
41259566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
41269566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
41275c0bae8cSAshish Patel   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
41289566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
41299566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
413093d70b8aSPierre Jolivet   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
41312205254eSKarl Rupp 
4132f4762488SHong Zhang   B->factortype = MAT_FACTOR_CHOLESKY;
413359ac8732SStefano Zampini #if defined(PETSC_USE_COMPLEX)
413459ac8732SStefano Zampini   mumps->sym = 2;
413559ac8732SStefano Zampini #else
4136b94d7dedSBarry Smith   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
41376fdc2a6dSBarry Smith   else mumps->sym = 2;
413859ac8732SStefano Zampini #endif
4139a214ac2aSShri Abhyankar 
414000c67f3bSHong Zhang   /* set solvertype */
41419566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
41429566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
41439566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
41442c7c0729SBarry Smith   if (size == 1) {
41454ac6704cSBarry Smith     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4146f73b0415SBarry Smith     B->canuseordering = PETSC_TRUE;
41472c7c0729SBarry Smith   }
41489566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4149f3c0ef26SHong Zhang   B->ops->destroy = MatDestroy_MUMPS;
4150e69c285eSBarry Smith   B->data         = (void *)mumps;
41512205254eSKarl Rupp 
41522877fffaSHong Zhang   *F               = B;
4153413bcc21SPierre Jolivet   mumps->id.job    = JOB_NULL;
4154413bcc21SPierre Jolivet   mumps->ICNTL_pre = NULL;
4155413bcc21SPierre Jolivet   mumps->CNTL_pre  = NULL;
4156d47f36abSHong Zhang   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
41573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
41582877fffaSHong Zhang }
415997969023SHong Zhang 
4160d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
4161d71ae5a4SJacob Faibussowitsch {
416267877ebaSShri Abhyankar   Mat         B;
416367877ebaSShri Abhyankar   Mat_MUMPS  *mumps;
4164ace3abfcSBarry Smith   PetscBool   isSeqBAIJ;
41652c7c0729SBarry Smith   PetscMPIInt size;
416667877ebaSShri Abhyankar 
416767877ebaSShri Abhyankar   PetscFunctionBegin;
416867877ebaSShri Abhyankar   /* Create the factorization matrix */
41699566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
41709566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
41719566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4172d2a308c1SPierre Jolivet   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
41739566063dSJacob Faibussowitsch   PetscCall(MatSetUp(B));
4174450b117fSShri Abhyankar 
41754dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&mumps));
4176966bd95aSPierre Jolivet   PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
4177450b117fSShri Abhyankar   B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
4178450b117fSShri Abhyankar   B->factortype            = MAT_FACTOR_LU;
4179bccb9932SShri Abhyankar   if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
4180bccb9932SShri Abhyankar   else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
4181746480a1SHong Zhang   mumps->sym = 0;
41829566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4183bccb9932SShri Abhyankar 
4184450b117fSShri Abhyankar   B->ops->view    = MatView_MUMPS;
4185722b6324SPierre Jolivet   B->ops->getinfo = MatGetInfo_MUMPS;
41862205254eSKarl Rupp 
41879566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
41889566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
41899566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
41909566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
41919566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
41929566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
41939566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
41949566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
41959566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
41969566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
41979566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
41985c0bae8cSAshish Patel   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
41999566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
42009566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
420193d70b8aSPierre Jolivet   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4202450b117fSShri Abhyankar 
420300c67f3bSHong Zhang   /* set solvertype */
42049566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
42059566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
42069566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
42072c7c0729SBarry Smith   if (size == 1) {
42084ac6704cSBarry Smith     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4209f73b0415SBarry Smith     B->canuseordering = PETSC_TRUE;
42102c7c0729SBarry Smith   }
42117ee00b23SStefano Zampini   B->ops->destroy = MatDestroy_MUMPS;
42127ee00b23SStefano Zampini   B->data         = (void *)mumps;
42137ee00b23SStefano Zampini 
42147ee00b23SStefano Zampini   *F               = B;
4215413bcc21SPierre Jolivet   mumps->id.job    = JOB_NULL;
4216413bcc21SPierre Jolivet   mumps->ICNTL_pre = NULL;
4217413bcc21SPierre Jolivet   mumps->CNTL_pre  = NULL;
4218d47f36abSHong Zhang   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
42193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
42207ee00b23SStefano Zampini }
42217ee00b23SStefano Zampini 
42227ee00b23SStefano Zampini /* MatGetFactor for Seq and MPI SELL matrices */
4223d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
4224d71ae5a4SJacob Faibussowitsch {
42257ee00b23SStefano Zampini   Mat         B;
42267ee00b23SStefano Zampini   Mat_MUMPS  *mumps;
42277ee00b23SStefano Zampini   PetscBool   isSeqSELL;
42282c7c0729SBarry Smith   PetscMPIInt size;
42297ee00b23SStefano Zampini 
42307ee00b23SStefano Zampini   PetscFunctionBegin;
42317ee00b23SStefano Zampini   /* Create the factorization matrix */
42329566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
42339566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
42349566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4235d2a308c1SPierre Jolivet   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
42369566063dSJacob Faibussowitsch   PetscCall(MatSetUp(B));
42377ee00b23SStefano Zampini 
42384dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&mumps));
42397ee00b23SStefano Zampini 
42407ee00b23SStefano Zampini   B->ops->view    = MatView_MUMPS;
42417ee00b23SStefano Zampini   B->ops->getinfo = MatGetInfo_MUMPS;
42427ee00b23SStefano Zampini 
42439566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
42449566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
42459566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
42469566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
42479566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
42489566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
42499566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
42509566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
42519566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
42529566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
42539566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
42545c0bae8cSAshish Patel   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
42557ee00b23SStefano Zampini 
4256966bd95aSPierre Jolivet   PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
42577ee00b23SStefano Zampini   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
42587ee00b23SStefano Zampini   B->factortype            = MAT_FACTOR_LU;
4259966bd95aSPierre Jolivet   PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4260966bd95aSPierre Jolivet   mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
42617ee00b23SStefano Zampini   mumps->sym              = 0;
42629566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
42637ee00b23SStefano Zampini 
42647ee00b23SStefano Zampini   /* set solvertype */
42659566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
42669566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
42679566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
42682c7c0729SBarry Smith   if (size == 1) {
42694ac6704cSBarry Smith     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
4270f73b0415SBarry Smith     B->canuseordering = PETSC_TRUE;
42712c7c0729SBarry Smith   }
4272450b117fSShri Abhyankar   B->ops->destroy = MatDestroy_MUMPS;
4273e69c285eSBarry Smith   B->data         = (void *)mumps;
42742205254eSKarl Rupp 
4275450b117fSShri Abhyankar   *F               = B;
4276413bcc21SPierre Jolivet   mumps->id.job    = JOB_NULL;
4277413bcc21SPierre Jolivet   mumps->ICNTL_pre = NULL;
4278413bcc21SPierre Jolivet   mumps->CNTL_pre  = NULL;
4279d47f36abSHong Zhang   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
42803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4281450b117fSShri Abhyankar }
428242c9c57cSBarry Smith 
42839d0448ceSStefano Zampini /* MatGetFactor for MATNEST matrices */
42849d0448ceSStefano Zampini static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
42859d0448ceSStefano Zampini {
42869d0448ceSStefano Zampini   Mat         B, **mats;
42879d0448ceSStefano Zampini   Mat_MUMPS  *mumps;
42889d0448ceSStefano Zampini   PetscInt    nr, nc;
42899d0448ceSStefano Zampini   PetscMPIInt size;
429003e5aca4SStefano Zampini   PetscBool   flg = PETSC_TRUE;
42919d0448ceSStefano Zampini 
42929d0448ceSStefano Zampini   PetscFunctionBegin;
42939d0448ceSStefano Zampini #if defined(PETSC_USE_COMPLEX)
429403e5aca4SStefano Zampini   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
429503e5aca4SStefano Zampini     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
429603e5aca4SStefano Zampini     *F = NULL;
429703e5aca4SStefano Zampini     PetscFunctionReturn(PETSC_SUCCESS);
429803e5aca4SStefano Zampini   }
42999d0448ceSStefano Zampini #endif
43009d0448ceSStefano Zampini 
430103e5aca4SStefano Zampini   /* Return if some condition is not satisfied */
430203e5aca4SStefano Zampini   *F = NULL;
43039d0448ceSStefano Zampini   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
43049d0448ceSStefano Zampini   if (ftype == MAT_FACTOR_CHOLESKY) {
43059d0448ceSStefano Zampini     IS       *rows, *cols;
43069d0448ceSStefano Zampini     PetscInt *m, *M;
43079d0448ceSStefano Zampini 
43089d0448ceSStefano Zampini     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
43099d0448ceSStefano Zampini     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
43109d0448ceSStefano Zampini     PetscCall(MatNestGetISs(A, rows, cols));
43119d0448ceSStefano Zampini     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
431203e5aca4SStefano Zampini     if (!flg) {
431303e5aca4SStefano Zampini       PetscCall(PetscFree2(rows, cols));
431403e5aca4SStefano Zampini       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
431503e5aca4SStefano Zampini       PetscFunctionReturn(PETSC_SUCCESS);
431603e5aca4SStefano Zampini     }
43179d0448ceSStefano Zampini     PetscCall(PetscMalloc2(nr, &m, nr, &M));
43189d0448ceSStefano Zampini     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
43199d0448ceSStefano Zampini     for (PetscInt r = 0; flg && r < nr; r++)
43209d0448ceSStefano Zampini       for (PetscInt k = r + 1; flg && k < nr; k++)
43219d0448ceSStefano Zampini         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
43229d0448ceSStefano Zampini     PetscCall(PetscFree2(m, M));
43239d0448ceSStefano Zampini     PetscCall(PetscFree2(rows, cols));
432403e5aca4SStefano Zampini     if (!flg) {
432503e5aca4SStefano Zampini       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
432603e5aca4SStefano Zampini       PetscFunctionReturn(PETSC_SUCCESS);
432703e5aca4SStefano Zampini     }
43289d0448ceSStefano Zampini   }
43299d0448ceSStefano Zampini 
43309d0448ceSStefano Zampini   for (PetscInt r = 0; r < nr; r++) {
43319d0448ceSStefano Zampini     for (PetscInt c = 0; c < nc; c++) {
43329d0448ceSStefano Zampini       Mat       sub = mats[r][c];
433353587d93SPierre Jolivet       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
43349d0448ceSStefano Zampini 
43359d0448ceSStefano Zampini       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
433653587d93SPierre Jolivet       PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
43379d0448ceSStefano Zampini       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
43389d0448ceSStefano Zampini       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
43399d0448ceSStefano Zampini       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
43409d0448ceSStefano Zampini       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
43419d0448ceSStefano Zampini       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
43429d0448ceSStefano Zampini       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
4343c3e1b152SPierre Jolivet       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
43444b9405b2SPierre Jolivet       PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
43459d0448ceSStefano Zampini       if (ftype == MAT_FACTOR_CHOLESKY) {
4346dcab004fSPierre Jolivet         if (r == c) {
43474b9405b2SPierre Jolivet           if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
434840afc089SBarry Smith             PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
434903e5aca4SStefano Zampini             flg = PETSC_FALSE;
4350dcab004fSPierre Jolivet           }
43514b9405b2SPierre Jolivet         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
435240afc089SBarry Smith           PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
435303e5aca4SStefano Zampini           flg = PETSC_FALSE;
435403e5aca4SStefano Zampini         }
43554b9405b2SPierre Jolivet       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
43569afb9c56SPierre Jolivet         PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
435703e5aca4SStefano Zampini         flg = PETSC_FALSE;
43589d0448ceSStefano Zampini       }
43599d0448ceSStefano Zampini     }
436003e5aca4SStefano Zampini   }
436103e5aca4SStefano Zampini   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);
43629d0448ceSStefano Zampini 
43639d0448ceSStefano Zampini   /* Create the factorization matrix */
43649d0448ceSStefano Zampini   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
43659d0448ceSStefano Zampini   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
43669d0448ceSStefano Zampini   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
43679d0448ceSStefano Zampini   PetscCall(MatSetUp(B));
43689d0448ceSStefano Zampini 
43699d0448ceSStefano Zampini   PetscCall(PetscNew(&mumps));
43709d0448ceSStefano Zampini 
43719d0448ceSStefano Zampini   B->ops->view    = MatView_MUMPS;
43729d0448ceSStefano Zampini   B->ops->getinfo = MatGetInfo_MUMPS;
43739d0448ceSStefano Zampini 
43749d0448ceSStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
43759d0448ceSStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
43769d0448ceSStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
43779d0448ceSStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
43789d0448ceSStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
43799d0448ceSStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
43809d0448ceSStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
43819d0448ceSStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
43829d0448ceSStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
43839d0448ceSStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
43849d0448ceSStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
43859d0448ceSStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
43869d0448ceSStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
43879d0448ceSStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
438893d70b8aSPierre Jolivet   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
43899d0448ceSStefano Zampini 
43909d0448ceSStefano Zampini   if (ftype == MAT_FACTOR_LU) {
43919d0448ceSStefano Zampini     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
43929d0448ceSStefano Zampini     B->factortype            = MAT_FACTOR_LU;
43939d0448ceSStefano Zampini     mumps->sym               = 0;
43949d0448ceSStefano Zampini   } else {
43959d0448ceSStefano Zampini     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
43969d0448ceSStefano Zampini     B->factortype                  = MAT_FACTOR_CHOLESKY;
43979d0448ceSStefano Zampini #if defined(PETSC_USE_COMPLEX)
43989d0448ceSStefano Zampini     mumps->sym = 2;
43999d0448ceSStefano Zampini #else
44009d0448ceSStefano Zampini     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
44019d0448ceSStefano Zampini     else mumps->sym = 2;
44029d0448ceSStefano Zampini #endif
44039d0448ceSStefano Zampini   }
44049d0448ceSStefano Zampini   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
44059d0448ceSStefano Zampini   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
44069d0448ceSStefano Zampini 
44079d0448ceSStefano Zampini   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
44089d0448ceSStefano Zampini   if (size == 1) {
44099d0448ceSStefano Zampini     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
44109d0448ceSStefano Zampini     B->canuseordering = PETSC_TRUE;
44119d0448ceSStefano Zampini   }
44129d0448ceSStefano Zampini 
44139d0448ceSStefano Zampini   /* set solvertype */
44149d0448ceSStefano Zampini   PetscCall(PetscFree(B->solvertype));
44159d0448ceSStefano Zampini   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
44169d0448ceSStefano Zampini   B->ops->destroy = MatDestroy_MUMPS;
44179d0448ceSStefano Zampini   B->data         = (void *)mumps;
44189d0448ceSStefano Zampini 
44199d0448ceSStefano Zampini   *F               = B;
44209d0448ceSStefano Zampini   mumps->id.job    = JOB_NULL;
44219d0448ceSStefano Zampini   mumps->ICNTL_pre = NULL;
44229d0448ceSStefano Zampini   mumps->CNTL_pre  = NULL;
44239d0448ceSStefano Zampini   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
44249d0448ceSStefano Zampini   PetscFunctionReturn(PETSC_SUCCESS);
44259d0448ceSStefano Zampini }
44269d0448ceSStefano Zampini 
4427d1f0640dSPierre Jolivet PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
4428d71ae5a4SJacob Faibussowitsch {
442942c9c57cSBarry Smith   PetscFunctionBegin;
44309566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
44319566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
44329566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
44339566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
44349566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
44359566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
44369566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
44379566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
44389566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
44399566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
44409566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
4441c3e1b152SPierre Jolivet   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4442c3e1b152SPierre Jolivet   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
44434b9405b2SPierre Jolivet   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
44444b9405b2SPierre Jolivet   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
44454b9405b2SPierre Jolivet   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
44464b9405b2SPierre Jolivet   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
44479d0448ceSStefano Zampini   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
44489d0448ceSStefano Zampini   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
44493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
445042c9c57cSBarry Smith }
4451